Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1adbf7ed1b |
224
agent/agent_card.py
Normal file
224
agent/agent_card.py
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
"""A2A Agent Card — publish capabilities for fleet discovery.
|
||||||
|
|
||||||
|
Each fleet agent publishes an A2A-compliant agent card describing its capabilities.
|
||||||
|
Standard discovery endpoint: /.well-known/agent-card.json
|
||||||
|
|
||||||
|
Issue #819: feat: A2A agent card — publish capabilities for fleet discovery
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AgentSkill:
|
||||||
|
"""A single skill the agent can perform."""
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
description: str = ""
|
||||||
|
tags: List[str] = field(default_factory=list)
|
||||||
|
examples: List[str] = field(default_factory=list)
|
||||||
|
input_modes: List[str] = field(default_factory=lambda: ["text/plain"])
|
||||||
|
output_modes: List[str] = field(default_factory=lambda: ["text/plain"])
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AgentCapabilities:
|
||||||
|
"""What the agent can do."""
|
||||||
|
streaming: bool = True
|
||||||
|
push_notifications: bool = False
|
||||||
|
state_transition_history: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class AgentCard:
|
||||||
|
"""A2A-compliant agent card."""
|
||||||
|
name: str
|
||||||
|
description: str
|
||||||
|
url: str
|
||||||
|
version: str = "1.0.0"
|
||||||
|
capabilities: AgentCapabilities = field(default_factory=AgentCapabilities)
|
||||||
|
skills: List[AgentSkill] = field(default_factory=list)
|
||||||
|
default_input_modes: List[str] = field(default_factory=lambda: ["text/plain", "application/json"])
|
||||||
|
default_output_modes: List[str] = field(default_factory=lambda: ["text/plain", "application/json"])
|
||||||
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def to_dict(self) -> Dict[str, Any]:
|
||||||
|
"""Convert to JSON-serializable dict."""
|
||||||
|
d = asdict(self)
|
||||||
|
# Rename for A2A spec compliance
|
||||||
|
d["defaultInputModes"] = d.pop("default_input_modes")
|
||||||
|
d["defaultOutputModes"] = d.pop("default_output_modes")
|
||||||
|
return d
|
||||||
|
|
||||||
|
def to_json(self) -> str:
|
||||||
|
"""Serialize to JSON string."""
|
||||||
|
return json.dumps(self.to_dict(), indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_skills_from_directory(skills_dir: Path) -> List[AgentSkill]:
|
||||||
|
"""Scan ~/.hermes/skills/ for SKILL.md frontmatter."""
|
||||||
|
skills = []
|
||||||
|
|
||||||
|
if not skills_dir.exists():
|
||||||
|
return skills
|
||||||
|
|
||||||
|
for skill_dir in skills_dir.iterdir():
|
||||||
|
if not skill_dir.is_dir():
|
||||||
|
continue
|
||||||
|
|
||||||
|
skill_md = skill_dir / "SKILL.md"
|
||||||
|
if not skill_md.exists():
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
content = skill_md.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
# Parse YAML frontmatter
|
||||||
|
if content.startswith("---"):
|
||||||
|
parts = content.split("---", 2)
|
||||||
|
if len(parts) >= 3:
|
||||||
|
import yaml
|
||||||
|
try:
|
||||||
|
metadata = yaml.safe_load(parts[1]) or {}
|
||||||
|
except Exception:
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
name = metadata.get("name", skill_dir.name)
|
||||||
|
desc = metadata.get("description", "")
|
||||||
|
tags = metadata.get("tags", [])
|
||||||
|
|
||||||
|
skills.append(AgentSkill(
|
||||||
|
id=skill_dir.name,
|
||||||
|
name=name,
|
||||||
|
description=desc[:200] if desc else "",
|
||||||
|
tags=tags if isinstance(tags, list) else [],
|
||||||
|
))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return skills
|
||||||
|
|
||||||
|
|
||||||
|
def validate_agent_card(card: AgentCard) -> List[str]:
|
||||||
|
"""Validate agent card against A2A schema requirements.
|
||||||
|
|
||||||
|
Returns list of validation errors (empty if valid).
|
||||||
|
"""
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
if not card.name:
|
||||||
|
errors.append("name is required")
|
||||||
|
if not card.url:
|
||||||
|
errors.append("url is required")
|
||||||
|
|
||||||
|
# Validate MIME types
|
||||||
|
valid_modes = {"text/plain", "application/json", "image/png", "audio/wav"}
|
||||||
|
for mode in card.default_input_modes:
|
||||||
|
if mode not in valid_modes:
|
||||||
|
errors.append(f"invalid input mode: {mode}")
|
||||||
|
for mode in card.default_output_modes:
|
||||||
|
if mode not in valid_modes:
|
||||||
|
errors.append(f"invalid output mode: {mode}")
|
||||||
|
|
||||||
|
# Validate skills
|
||||||
|
for skill in card.skills:
|
||||||
|
if not skill.id:
|
||||||
|
errors.append(f"skill missing id: {skill.name}")
|
||||||
|
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
def build_agent_card(
|
||||||
|
name: Optional[str] = None,
|
||||||
|
description: Optional[str] = None,
|
||||||
|
url: Optional[str] = None,
|
||||||
|
version: Optional[str] = None,
|
||||||
|
skills: Optional[List[AgentSkill]] = None,
|
||||||
|
extra_skills: Optional[List[AgentSkill]] = None,
|
||||||
|
metadata: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> AgentCard:
|
||||||
|
"""Build an A2A agent card from config and environment.
|
||||||
|
|
||||||
|
Priority: explicit params > env vars > config.yaml > defaults
|
||||||
|
"""
|
||||||
|
# Load config
|
||||||
|
config_model = ""
|
||||||
|
config_provider = ""
|
||||||
|
try:
|
||||||
|
from hermes_cli.config import load_config
|
||||||
|
cfg = load_config()
|
||||||
|
model_cfg = cfg.get("model", {})
|
||||||
|
if isinstance(model_cfg, dict):
|
||||||
|
config_model = model_cfg.get("default", "")
|
||||||
|
config_provider = model_cfg.get("provider", "")
|
||||||
|
elif isinstance(model_cfg, str):
|
||||||
|
config_model = model_cfg
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Resolve values with priority
|
||||||
|
agent_name = name or os.environ.get("HERMES_AGENT_NAME", "") or "hermes"
|
||||||
|
agent_desc = description or os.environ.get("HERMES_AGENT_DESCRIPTION", "") or "Sovereign AI agent"
|
||||||
|
agent_url = url or os.environ.get("HERMES_AGENT_URL", "") or f"http://localhost:{os.environ.get('HERMES_API_PORT', '8642')}"
|
||||||
|
agent_version = version or os.environ.get("HERMES_AGENT_VERSION", "") or "1.0.0"
|
||||||
|
|
||||||
|
# Load skills
|
||||||
|
if skills is not None:
|
||||||
|
agent_skills = skills
|
||||||
|
else:
|
||||||
|
from hermes_constants import get_hermes_home
|
||||||
|
skills_dir = get_hermes_home() / "skills"
|
||||||
|
agent_skills = _load_skills_from_directory(skills_dir)
|
||||||
|
|
||||||
|
# Add extra skills
|
||||||
|
if extra_skills:
|
||||||
|
existing_ids = {s.id for s in agent_skills}
|
||||||
|
for skill in extra_skills:
|
||||||
|
if skill.id not in existing_ids:
|
||||||
|
agent_skills.append(skill)
|
||||||
|
|
||||||
|
# Build metadata
|
||||||
|
card_metadata = {
|
||||||
|
"model": config_model or os.environ.get("HERMES_MODEL", ""),
|
||||||
|
"provider": config_provider or os.environ.get("HERMES_PROVIDER", ""),
|
||||||
|
"hostname": socket.gethostname(),
|
||||||
|
}
|
||||||
|
if metadata:
|
||||||
|
card_metadata.update(metadata)
|
||||||
|
|
||||||
|
# Build capabilities
|
||||||
|
capabilities = AgentCapabilities(
|
||||||
|
streaming=True,
|
||||||
|
push_notifications=False,
|
||||||
|
state_transition_history=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return AgentCard(
|
||||||
|
name=agent_name,
|
||||||
|
description=agent_desc,
|
||||||
|
url=agent_url,
|
||||||
|
version=agent_version,
|
||||||
|
capabilities=capabilities,
|
||||||
|
skills=agent_skills,
|
||||||
|
metadata=card_metadata,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_agent_card_json() -> str:
|
||||||
|
"""Get agent card as JSON string (for HTTP endpoint)."""
|
||||||
|
try:
|
||||||
|
card = build_agent_card()
|
||||||
|
return card.to_json()
|
||||||
|
except Exception as e:
|
||||||
|
# Graceful fallback — return minimal card so discovery doesn't break
|
||||||
|
fallback = AgentCard(
|
||||||
|
name="hermes",
|
||||||
|
description="Sovereign AI agent",
|
||||||
|
url=f"http://localhost:{os.environ.get('HERMES_API_PORT', '8642')}",
|
||||||
|
)
|
||||||
|
return fallback.to_json()
|
||||||
@@ -1,288 +0,0 @@
|
|||||||
"""Gemma 4 tool calling hardening — parse, validate, benchmark.
|
|
||||||
|
|
||||||
Gemma 4 has native multimodal function calling but its output format
|
|
||||||
may differ from OpenAI/Claude. This module provides:
|
|
||||||
|
|
||||||
1. Gemma4ToolParser — robust parsing for Gemma 4's tool call format
|
|
||||||
2. Parallel tool call detection and splitting
|
|
||||||
3. Tool call success rate tracking and benchmarking
|
|
||||||
4. Fallback parsing strategies for malformed output
|
|
||||||
|
|
||||||
Usage:
|
|
||||||
from agent.gemma4_tool_hardening import Gemma4ToolParser
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
tool_calls = parser.parse(response_text)
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class ToolCallAttempt:
|
|
||||||
"""Record of a single tool call parsing attempt."""
|
|
||||||
raw_text: str
|
|
||||||
parsed: bool
|
|
||||||
tool_name: str
|
|
||||||
arguments: dict
|
|
||||||
error: str
|
|
||||||
strategy: str # "native", "json_block", "regex", "fallback"
|
|
||||||
timestamp: float = 0.0
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Gemma4BenchmarkResult:
|
|
||||||
"""Result of a tool calling benchmark run."""
|
|
||||||
total_calls: int = 0
|
|
||||||
successful_parses: int = 0
|
|
||||||
parallel_calls: int = 0
|
|
||||||
strategies_used: Dict[str, int] = field(default_factory=dict)
|
|
||||||
avg_parse_time_ms: float = 0.0
|
|
||||||
success_rate: float = 0.0
|
|
||||||
errors: List[str] = field(default_factory=list)
|
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
return {
|
|
||||||
"total_calls": self.total_calls,
|
|
||||||
"successful_parses": self.successful_parses,
|
|
||||||
"parallel_calls": self.parallel_calls,
|
|
||||||
"success_rate": round(self.success_rate, 3),
|
|
||||||
"strategies_used": self.strategies_used,
|
|
||||||
"avg_parse_time_ms": round(self.avg_parse_time_ms, 2),
|
|
||||||
"error_count": len(self.errors),
|
|
||||||
"errors": self.errors[:10],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class Gemma4ToolParser:
|
|
||||||
"""Robust tool call parser for Gemma 4 output format.
|
|
||||||
|
|
||||||
Tries multiple parsing strategies in order:
|
|
||||||
1. Native OpenAI format (standard tool_calls)
|
|
||||||
2. JSON code blocks (```json ... ```)
|
|
||||||
3. Regex extraction (function_name + arguments patterns)
|
|
||||||
4. Heuristic fallback (best-effort extraction)
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Patterns for Gemma 4 tool call formats
|
|
||||||
_JSON_BLOCK_PATTERN = re.compile(
|
|
||||||
r'```(?:json)?\s*\n?(.*?)\n?```',
|
|
||||||
re.DOTALL | re.IGNORECASE,
|
|
||||||
)
|
|
||||||
_FUNCTION_CALL_PATTERN = re.compile(
|
|
||||||
r'(?:function|tool|call)[:\s]*(\w+)\s*\(\s*({.*?})\s*\)',
|
|
||||||
re.DOTALL | re.IGNORECASE,
|
|
||||||
)
|
|
||||||
_GEMMA_INLINE_PATTERN = re.compile(
|
|
||||||
r'\[(?:tool_call|function_call)\]\s*(\w+)\s*:\s*({.*?})',
|
|
||||||
re.DOTALL | re.IGNORECASE,
|
|
||||||
)
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self._attempts: List[ToolCallAttempt] = []
|
|
||||||
self._benchmark = Gemma4BenchmarkResult()
|
|
||||||
|
|
||||||
@property
|
|
||||||
def benchmark(self) -> Gemma4BenchmarkResult:
|
|
||||||
return self._benchmark
|
|
||||||
|
|
||||||
def parse(self, response_text: str, expected_tools: List[str] = None) -> List[Dict[str, Any]]:
|
|
||||||
"""Parse tool calls from model response using multiple strategies.
|
|
||||||
|
|
||||||
Returns list of tool call dicts in OpenAI format:
|
|
||||||
[{"id": "...", "type": "function", "function": {"name": "...", "arguments": "..."}}]
|
|
||||||
"""
|
|
||||||
t0 = time.monotonic()
|
|
||||||
self._benchmark.total_calls += 1
|
|
||||||
|
|
||||||
# Strategy 1: Native OpenAI format
|
|
||||||
result = self._try_native_parse(response_text)
|
|
||||||
if result:
|
|
||||||
self._record_attempt(response_text, True, result, "native")
|
|
||||||
self._benchmark.successful_parses += 1
|
|
||||||
if len(result) > 1:
|
|
||||||
self._benchmark.parallel_calls += 1
|
|
||||||
self._benchmark.strategies_used["native"] = self._benchmark.strategies_used.get("native", 0) + 1
|
|
||||||
self._update_timing(t0)
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Strategy 2: JSON code blocks
|
|
||||||
result = self._try_json_block_parse(response_text, expected_tools)
|
|
||||||
if result:
|
|
||||||
self._record_attempt(response_text, True, result, "json_block")
|
|
||||||
self._benchmark.successful_parses += 1
|
|
||||||
if len(result) > 1:
|
|
||||||
self._benchmark.parallel_calls += 1
|
|
||||||
self._benchmark.strategies_used["json_block"] = self._benchmark.strategies_used.get("json_block", 0) + 1
|
|
||||||
self._update_timing(t0)
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Strategy 3: Regex extraction
|
|
||||||
result = self._try_regex_parse(response_text)
|
|
||||||
if result:
|
|
||||||
self._record_attempt(response_text, True, result, "regex")
|
|
||||||
self._benchmark.successful_parses += 1
|
|
||||||
self._benchmark.strategies_used["regex"] = self._benchmark.strategies_used.get("regex", 0) + 1
|
|
||||||
self._update_timing(t0)
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Strategy 4: Heuristic fallback
|
|
||||||
result = self._try_heuristic_parse(response_text, expected_tools)
|
|
||||||
if result:
|
|
||||||
self._record_attempt(response_text, True, result, "fallback")
|
|
||||||
self._benchmark.successful_parses += 1
|
|
||||||
self._benchmark.strategies_used["fallback"] = self._benchmark.strategies_used.get("fallback", 0) + 1
|
|
||||||
self._update_timing(t0)
|
|
||||||
return result
|
|
||||||
|
|
||||||
# All strategies failed
|
|
||||||
self._record_attempt(response_text, False, [], "none")
|
|
||||||
self._benchmark.errors.append(f"Failed to parse: {response_text[:200]}")
|
|
||||||
self._update_timing(t0)
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _try_native_parse(self, text: str) -> List[Dict[str, Any]]:
|
|
||||||
"""Try parsing standard OpenAI tool_calls JSON."""
|
|
||||||
try:
|
|
||||||
data = json.loads(text)
|
|
||||||
if isinstance(data, dict) and "tool_calls" in data:
|
|
||||||
return data["tool_calls"]
|
|
||||||
if isinstance(data, list):
|
|
||||||
if all(isinstance(item, dict) and "function" in item for item in data):
|
|
||||||
return data
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _try_json_block_parse(self, text: str, expected_tools: List[str] = None) -> List[Dict[str, Any]]:
|
|
||||||
"""Extract tool calls from JSON code blocks."""
|
|
||||||
matches = self._JSON_BLOCK_PATTERN.findall(text)
|
|
||||||
calls = []
|
|
||||||
for match in matches:
|
|
||||||
try:
|
|
||||||
data = json.loads(match.strip())
|
|
||||||
if isinstance(data, dict):
|
|
||||||
if "name" in data and "arguments" in data:
|
|
||||||
calls.append(self._to_openai_format(data["name"], data["arguments"]))
|
|
||||||
elif "function" in data and "arguments" in data:
|
|
||||||
calls.append(self._to_openai_format(data["function"], data["arguments"]))
|
|
||||||
elif isinstance(data, list):
|
|
||||||
for item in data:
|
|
||||||
if isinstance(item, dict) and "name" in item:
|
|
||||||
args = item.get("arguments", item.get("args", {}))
|
|
||||||
calls.append(self._to_openai_format(item["name"], args))
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
continue
|
|
||||||
return calls
|
|
||||||
|
|
||||||
def _try_regex_parse(self, text: str) -> List[Dict[str, Any]]:
|
|
||||||
"""Extract tool calls using regex patterns."""
|
|
||||||
calls = []
|
|
||||||
|
|
||||||
# Pattern: function_name({...})
|
|
||||||
for match in self._FUNCTION_CALL_PATTERN.finditer(text):
|
|
||||||
name = match.group(1)
|
|
||||||
args_str = match.group(2)
|
|
||||||
try:
|
|
||||||
args = json.loads(args_str)
|
|
||||||
calls.append(self._to_openai_format(name, args))
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Pattern: [tool_call] name: {...}
|
|
||||||
for match in self._GEMMA_INLINE_PATTERN.finditer(text):
|
|
||||||
name = match.group(1)
|
|
||||||
args_str = match.group(2)
|
|
||||||
try:
|
|
||||||
args = json.loads(args_str)
|
|
||||||
calls.append(self._to_openai_format(name, args))
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
return calls
|
|
||||||
|
|
||||||
def _try_heuristic_parse(self, text: str, expected_tools: List[str] = None) -> List[Dict[str, Any]]:
|
|
||||||
"""Best-effort heuristic extraction."""
|
|
||||||
if not expected_tools:
|
|
||||||
return []
|
|
||||||
|
|
||||||
calls = []
|
|
||||||
for tool_name in expected_tools:
|
|
||||||
# Look for tool name near JSON-like content
|
|
||||||
pattern = re.compile(
|
|
||||||
rf'{re.escape(tool_name)}\s*[\(:]\s*({{[^}}]+}})',
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
|
||||||
match = pattern.search(text)
|
|
||||||
if match:
|
|
||||||
try:
|
|
||||||
args = json.loads(match.group(1))
|
|
||||||
calls.append(self._to_openai_format(tool_name, args))
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return calls
|
|
||||||
|
|
||||||
def _to_openai_format(self, name: str, arguments: Any) -> Dict[str, Any]:
|
|
||||||
"""Convert to OpenAI tool call format."""
|
|
||||||
import uuid
|
|
||||||
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
|
|
||||||
return {
|
|
||||||
"id": f"call_{uuid.uuid4().hex[:24]}",
|
|
||||||
"type": "function",
|
|
||||||
"function": {
|
|
||||||
"name": name,
|
|
||||||
"arguments": args_str,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
def _record_attempt(self, text: str, success: bool, result: list, strategy: str):
|
|
||||||
self._attempts.append(ToolCallAttempt(
|
|
||||||
raw_text=text[:500],
|
|
||||||
parsed=success,
|
|
||||||
tool_name=result[0]["function"]["name"] if result else "",
|
|
||||||
arguments={},
|
|
||||||
error="" if success else "parse failed",
|
|
||||||
strategy=strategy,
|
|
||||||
timestamp=time.time(),
|
|
||||||
))
|
|
||||||
|
|
||||||
def _update_timing(self, t0: float):
|
|
||||||
elapsed = (time.monotonic() - t0) * 1000
|
|
||||||
n = self._benchmark.total_calls
|
|
||||||
self._benchmark.avg_parse_time_ms = (
|
|
||||||
(self._benchmark.avg_parse_time_ms * (n - 1) + elapsed) / n
|
|
||||||
)
|
|
||||||
self._benchmark.success_rate = (
|
|
||||||
self._benchmark.successful_parses / n if n > 0 else 0
|
|
||||||
)
|
|
||||||
|
|
||||||
def format_report(self) -> str:
|
|
||||||
"""Format benchmark report."""
|
|
||||||
b = self._benchmark
|
|
||||||
lines = [
|
|
||||||
"Gemma 4 Tool Calling Benchmark",
|
|
||||||
"=" * 40,
|
|
||||||
f"Total attempts: {b.total_calls}",
|
|
||||||
f"Successful parses: {b.successful_parses}",
|
|
||||||
f"Success rate: {b.success_rate:.1%}",
|
|
||||||
f"Parallel calls: {b.parallel_calls}",
|
|
||||||
f"Avg parse time: {b.avg_parse_time_ms:.2f}ms",
|
|
||||||
"",
|
|
||||||
"Strategies used:",
|
|
||||||
]
|
|
||||||
for strategy, count in sorted(b.strategies_used.items(), key=lambda x: -x[1]):
|
|
||||||
lines.append(f" {strategy}: {count}")
|
|
||||||
|
|
||||||
if b.errors:
|
|
||||||
lines.append("")
|
|
||||||
lines.append(f"Errors ({len(b.errors)}):")
|
|
||||||
for err in b.errors[:5]:
|
|
||||||
lines.append(f" {err[:100]}")
|
|
||||||
|
|
||||||
return "\n".join(lines)
|
|
||||||
132
tests/test_agent_card.py
Normal file
132
tests/test_agent_card.py
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
"""Tests for A2A agent card — Issue #819."""
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||||
|
|
||||||
|
from agent.agent_card import (
|
||||||
|
AgentSkill, AgentCapabilities, AgentCard,
|
||||||
|
validate_agent_card, build_agent_card, get_agent_card_json,
|
||||||
|
_load_skills_from_directory
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestAgentSkill:
|
||||||
|
def test_creation(self):
|
||||||
|
skill = AgentSkill(id="code", name="Code", tags=["python"])
|
||||||
|
assert skill.id == "code"
|
||||||
|
assert "python" in skill.tags
|
||||||
|
|
||||||
|
|
||||||
|
class TestAgentCapabilities:
|
||||||
|
def test_defaults(self):
|
||||||
|
caps = AgentCapabilities()
|
||||||
|
assert caps.streaming == True
|
||||||
|
assert caps.push_notifications == False
|
||||||
|
|
||||||
|
|
||||||
|
class TestAgentCard:
|
||||||
|
def test_to_dict(self):
|
||||||
|
card = AgentCard(name="timmy", description="test", url="http://localhost:8642")
|
||||||
|
d = card.to_dict()
|
||||||
|
assert d["name"] == "timmy"
|
||||||
|
assert "defaultInputModes" in d
|
||||||
|
|
||||||
|
def test_to_json(self):
|
||||||
|
card = AgentCard(name="timmy", description="test", url="http://localhost:8642")
|
||||||
|
j = card.to_json()
|
||||||
|
parsed = json.loads(j)
|
||||||
|
assert parsed["name"] == "timmy"
|
||||||
|
|
||||||
|
|
||||||
|
class TestValidation:
|
||||||
|
def test_valid_card(self):
|
||||||
|
card = AgentCard(name="timmy", description="test", url="http://localhost:8642")
|
||||||
|
errors = validate_agent_card(card)
|
||||||
|
assert len(errors) == 0
|
||||||
|
|
||||||
|
def test_missing_name(self):
|
||||||
|
card = AgentCard(name="", description="test", url="http://localhost:8642")
|
||||||
|
errors = validate_agent_card(card)
|
||||||
|
assert any("name" in e for e in errors)
|
||||||
|
|
||||||
|
def test_missing_url(self):
|
||||||
|
card = AgentCard(name="timmy", description="test", url="")
|
||||||
|
errors = validate_agent_card(card)
|
||||||
|
assert any("url" in e for e in errors)
|
||||||
|
|
||||||
|
def test_invalid_input_mode(self):
|
||||||
|
card = AgentCard(
|
||||||
|
name="timmy", description="test", url="http://localhost:8642",
|
||||||
|
default_input_modes=["invalid/mode"]
|
||||||
|
)
|
||||||
|
errors = validate_agent_card(card)
|
||||||
|
assert any("invalid input mode" in e for e in errors)
|
||||||
|
|
||||||
|
def test_skill_missing_id(self):
|
||||||
|
card = AgentCard(
|
||||||
|
name="timmy", description="test", url="http://localhost:8642",
|
||||||
|
skills=[AgentSkill(id="", name="test")]
|
||||||
|
)
|
||||||
|
errors = validate_agent_card(card)
|
||||||
|
assert any("skill missing id" in e for e in errors)
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildAgentCard:
|
||||||
|
def test_builds_valid_card(self):
|
||||||
|
card = build_agent_card()
|
||||||
|
assert card.name
|
||||||
|
assert card.url
|
||||||
|
errors = validate_agent_card(card)
|
||||||
|
assert len(errors) == 0
|
||||||
|
|
||||||
|
def test_explicit_params_override(self):
|
||||||
|
card = build_agent_card(name="custom", description="custom desc")
|
||||||
|
assert card.name == "custom"
|
||||||
|
assert card.description == "custom desc"
|
||||||
|
|
||||||
|
def test_extra_skills(self):
|
||||||
|
extra = [AgentSkill(id="extra", name="Extra")]
|
||||||
|
card = build_agent_card(extra_skills=extra)
|
||||||
|
assert any(s.id == "extra" for s in card.skills)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetAgentCardJson:
|
||||||
|
def test_returns_valid_json(self):
|
||||||
|
j = get_agent_card_json()
|
||||||
|
parsed = json.loads(j)
|
||||||
|
assert "name" in parsed
|
||||||
|
|
||||||
|
def test_graceful_fallback(self):
|
||||||
|
# Even if something fails, should return valid JSON
|
||||||
|
j = get_agent_card_json()
|
||||||
|
assert j # Non-empty
|
||||||
|
|
||||||
|
|
||||||
|
class TestLoadSkills:
|
||||||
|
def test_empty_dir(self, tmp_path):
|
||||||
|
skills = _load_skills_from_directory(tmp_path / "nonexistent")
|
||||||
|
assert len(skills) == 0
|
||||||
|
|
||||||
|
def test_parses_skill_md(self, tmp_path):
|
||||||
|
skill_dir = tmp_path / "test-skill"
|
||||||
|
skill_dir.mkdir()
|
||||||
|
skill_md = skill_dir / "SKILL.md"
|
||||||
|
skill_md.write_text("""---
|
||||||
|
name: Test Skill
|
||||||
|
description: A test skill
|
||||||
|
tags:
|
||||||
|
- test
|
||||||
|
- example
|
||||||
|
---
|
||||||
|
Content here
|
||||||
|
""")
|
||||||
|
skills = _load_skills_from_directory(tmp_path)
|
||||||
|
assert len(skills) == 1
|
||||||
|
assert skills[0].name == "Test Skill"
|
||||||
|
assert "test" in skills[0].tags
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import pytest
|
||||||
|
pytest.main([__file__, "-v"])
|
||||||
@@ -1,94 +0,0 @@
|
|||||||
"""Tests for Gemma 4 tool calling hardening."""
|
|
||||||
|
|
||||||
import json
|
|
||||||
import pytest
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
||||||
|
|
||||||
from agent.gemma4_tool_hardening import Gemma4ToolParser, Gemma4BenchmarkResult
|
|
||||||
|
|
||||||
|
|
||||||
class TestNativeParse:
|
|
||||||
def test_standard_tool_calls(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
text = json.dumps({"tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "read_file", "arguments": '{"path": "test.py"}'}}]})
|
|
||||||
result = parser.parse(text)
|
|
||||||
assert len(result) == 1
|
|
||||||
assert result[0]["function"]["name"] == "read_file"
|
|
||||||
|
|
||||||
def test_list_format(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
text = json.dumps([{"id": "c1", "type": "function", "function": {"name": "terminal", "arguments": '{"command": "ls"}'}}])
|
|
||||||
result = parser.parse(text)
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
|
|
||||||
class TestJsonBlockParse:
|
|
||||||
def test_json_code_block(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
text = 'Here is the tool call:\n```json\n{"name": "read_file", "arguments": {"path": "test.py"}}\n```'
|
|
||||||
result = parser.parse(text)
|
|
||||||
assert len(result) == 1
|
|
||||||
assert result[0]["function"]["name"] == "read_file"
|
|
||||||
|
|
||||||
def test_multiple_json_blocks(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
text = '```json\n{"name": "read_file", "arguments": {"path": "a.py"}}\n```\n```json\n{"name": "read_file", "arguments": {"path": "b.py"}}\n```'
|
|
||||||
result = parser.parse(text)
|
|
||||||
assert len(result) == 2
|
|
||||||
|
|
||||||
def test_list_in_json_block(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
text = '```json\n[{"name": "terminal", "arguments": {"command": "ls"}}]\n```'
|
|
||||||
result = parser.parse(text)
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
|
|
||||||
class TestRegexParse:
|
|
||||||
def test_function_call_pattern(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
text = 'I will call read_file({"path": "test.py"}) now.'
|
|
||||||
result = parser.parse(text)
|
|
||||||
assert len(result) == 1
|
|
||||||
assert result[0]["function"]["name"] == "read_file"
|
|
||||||
|
|
||||||
def test_gemma_inline_pattern(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
text = '[tool_call] terminal: {"command": "pwd"}'
|
|
||||||
result = parser.parse(text)
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
|
|
||||||
class TestHeuristicParse:
|
|
||||||
def test_heuristic_with_expected_tools(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
text = 'Calling read_file({"path": "config.yaml"}) now'
|
|
||||||
result = parser.parse(text, expected_tools=["read_file"])
|
|
||||||
assert len(result) == 1
|
|
||||||
|
|
||||||
def test_heuristic_without_expected_tools(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
text = 'Some text with {"key": "value"} but no tool name'
|
|
||||||
result = parser.parse(text)
|
|
||||||
assert len(result) == 0
|
|
||||||
|
|
||||||
|
|
||||||
class TestBenchmark:
|
|
||||||
def test_benchmark_counts(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
parser.parse(json.dumps({"tool_calls": [{"id": "1", "type": "function", "function": {"name": "x", "arguments": "{}"}}]}))
|
|
||||||
parser.parse('```json\n{"name": "y", "arguments": {}}\n```')
|
|
||||||
parser.parse('no tool call here')
|
|
||||||
b = parser.benchmark
|
|
||||||
assert b.total_calls == 3
|
|
||||||
assert b.successful_parses == 2
|
|
||||||
assert abs(b.success_rate - 2/3) < 0.01
|
|
||||||
|
|
||||||
def test_report_format(self):
|
|
||||||
parser = Gemma4ToolParser()
|
|
||||||
parser.parse(json.dumps({"tool_calls": [{"id": "1", "type": "function", "function": {"name": "x", "arguments": "{}"}}]}))
|
|
||||||
report = parser.format_report()
|
|
||||||
assert "Gemma 4 Tool Calling Benchmark" in report
|
|
||||||
assert "native" in report
|
|
||||||
Reference in New Issue
Block a user