Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1adbf7ed1b |
224
agent/agent_card.py
Normal file
224
agent/agent_card.py
Normal file
@@ -0,0 +1,224 @@
|
||||
"""A2A Agent Card — publish capabilities for fleet discovery.
|
||||
|
||||
Each fleet agent publishes an A2A-compliant agent card describing its capabilities.
|
||||
Standard discovery endpoint: /.well-known/agent-card.json
|
||||
|
||||
Issue #819: feat: A2A agent card — publish capabilities for fleet discovery
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import socket
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentSkill:
|
||||
"""A single skill the agent can perform."""
|
||||
id: str
|
||||
name: str
|
||||
description: str = ""
|
||||
tags: List[str] = field(default_factory=list)
|
||||
examples: List[str] = field(default_factory=list)
|
||||
input_modes: List[str] = field(default_factory=lambda: ["text/plain"])
|
||||
output_modes: List[str] = field(default_factory=lambda: ["text/plain"])
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentCapabilities:
|
||||
"""What the agent can do."""
|
||||
streaming: bool = True
|
||||
push_notifications: bool = False
|
||||
state_transition_history: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentCard:
|
||||
"""A2A-compliant agent card."""
|
||||
name: str
|
||||
description: str
|
||||
url: str
|
||||
version: str = "1.0.0"
|
||||
capabilities: AgentCapabilities = field(default_factory=AgentCapabilities)
|
||||
skills: List[AgentSkill] = field(default_factory=list)
|
||||
default_input_modes: List[str] = field(default_factory=lambda: ["text/plain", "application/json"])
|
||||
default_output_modes: List[str] = field(default_factory=lambda: ["text/plain", "application/json"])
|
||||
metadata: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to JSON-serializable dict."""
|
||||
d = asdict(self)
|
||||
# Rename for A2A spec compliance
|
||||
d["defaultInputModes"] = d.pop("default_input_modes")
|
||||
d["defaultOutputModes"] = d.pop("default_output_modes")
|
||||
return d
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Serialize to JSON string."""
|
||||
return json.dumps(self.to_dict(), indent=2)
|
||||
|
||||
|
||||
def _load_skills_from_directory(skills_dir: Path) -> List[AgentSkill]:
|
||||
"""Scan ~/.hermes/skills/ for SKILL.md frontmatter."""
|
||||
skills = []
|
||||
|
||||
if not skills_dir.exists():
|
||||
return skills
|
||||
|
||||
for skill_dir in skills_dir.iterdir():
|
||||
if not skill_dir.is_dir():
|
||||
continue
|
||||
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
if not skill_md.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
content = skill_md.read_text(encoding="utf-8")
|
||||
|
||||
# Parse YAML frontmatter
|
||||
if content.startswith("---"):
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) >= 3:
|
||||
import yaml
|
||||
try:
|
||||
metadata = yaml.safe_load(parts[1]) or {}
|
||||
except Exception:
|
||||
metadata = {}
|
||||
|
||||
name = metadata.get("name", skill_dir.name)
|
||||
desc = metadata.get("description", "")
|
||||
tags = metadata.get("tags", [])
|
||||
|
||||
skills.append(AgentSkill(
|
||||
id=skill_dir.name,
|
||||
name=name,
|
||||
description=desc[:200] if desc else "",
|
||||
tags=tags if isinstance(tags, list) else [],
|
||||
))
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return skills
|
||||
|
||||
|
||||
def validate_agent_card(card: AgentCard) -> List[str]:
|
||||
"""Validate agent card against A2A schema requirements.
|
||||
|
||||
Returns list of validation errors (empty if valid).
|
||||
"""
|
||||
errors = []
|
||||
|
||||
if not card.name:
|
||||
errors.append("name is required")
|
||||
if not card.url:
|
||||
errors.append("url is required")
|
||||
|
||||
# Validate MIME types
|
||||
valid_modes = {"text/plain", "application/json", "image/png", "audio/wav"}
|
||||
for mode in card.default_input_modes:
|
||||
if mode not in valid_modes:
|
||||
errors.append(f"invalid input mode: {mode}")
|
||||
for mode in card.default_output_modes:
|
||||
if mode not in valid_modes:
|
||||
errors.append(f"invalid output mode: {mode}")
|
||||
|
||||
# Validate skills
|
||||
for skill in card.skills:
|
||||
if not skill.id:
|
||||
errors.append(f"skill missing id: {skill.name}")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def build_agent_card(
|
||||
name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
url: Optional[str] = None,
|
||||
version: Optional[str] = None,
|
||||
skills: Optional[List[AgentSkill]] = None,
|
||||
extra_skills: Optional[List[AgentSkill]] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> AgentCard:
|
||||
"""Build an A2A agent card from config and environment.
|
||||
|
||||
Priority: explicit params > env vars > config.yaml > defaults
|
||||
"""
|
||||
# Load config
|
||||
config_model = ""
|
||||
config_provider = ""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
config_model = model_cfg.get("default", "")
|
||||
config_provider = model_cfg.get("provider", "")
|
||||
elif isinstance(model_cfg, str):
|
||||
config_model = model_cfg
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Resolve values with priority
|
||||
agent_name = name or os.environ.get("HERMES_AGENT_NAME", "") or "hermes"
|
||||
agent_desc = description or os.environ.get("HERMES_AGENT_DESCRIPTION", "") or "Sovereign AI agent"
|
||||
agent_url = url or os.environ.get("HERMES_AGENT_URL", "") or f"http://localhost:{os.environ.get('HERMES_API_PORT', '8642')}"
|
||||
agent_version = version or os.environ.get("HERMES_AGENT_VERSION", "") or "1.0.0"
|
||||
|
||||
# Load skills
|
||||
if skills is not None:
|
||||
agent_skills = skills
|
||||
else:
|
||||
from hermes_constants import get_hermes_home
|
||||
skills_dir = get_hermes_home() / "skills"
|
||||
agent_skills = _load_skills_from_directory(skills_dir)
|
||||
|
||||
# Add extra skills
|
||||
if extra_skills:
|
||||
existing_ids = {s.id for s in agent_skills}
|
||||
for skill in extra_skills:
|
||||
if skill.id not in existing_ids:
|
||||
agent_skills.append(skill)
|
||||
|
||||
# Build metadata
|
||||
card_metadata = {
|
||||
"model": config_model or os.environ.get("HERMES_MODEL", ""),
|
||||
"provider": config_provider or os.environ.get("HERMES_PROVIDER", ""),
|
||||
"hostname": socket.gethostname(),
|
||||
}
|
||||
if metadata:
|
||||
card_metadata.update(metadata)
|
||||
|
||||
# Build capabilities
|
||||
capabilities = AgentCapabilities(
|
||||
streaming=True,
|
||||
push_notifications=False,
|
||||
state_transition_history=True,
|
||||
)
|
||||
|
||||
return AgentCard(
|
||||
name=agent_name,
|
||||
description=agent_desc,
|
||||
url=agent_url,
|
||||
version=agent_version,
|
||||
capabilities=capabilities,
|
||||
skills=agent_skills,
|
||||
metadata=card_metadata,
|
||||
)
|
||||
|
||||
|
||||
def get_agent_card_json() -> str:
|
||||
"""Get agent card as JSON string (for HTTP endpoint)."""
|
||||
try:
|
||||
card = build_agent_card()
|
||||
return card.to_json()
|
||||
except Exception as e:
|
||||
# Graceful fallback — return minimal card so discovery doesn't break
|
||||
fallback = AgentCard(
|
||||
name="hermes",
|
||||
description="Sovereign AI agent",
|
||||
url=f"http://localhost:{os.environ.get('HERMES_API_PORT', '8642')}",
|
||||
)
|
||||
return fallback.to_json()
|
||||
@@ -1,288 +0,0 @@
|
||||
"""Gemma 4 tool calling hardening — parse, validate, benchmark.
|
||||
|
||||
Gemma 4 has native multimodal function calling but its output format
|
||||
may differ from OpenAI/Claude. This module provides:
|
||||
|
||||
1. Gemma4ToolParser — robust parsing for Gemma 4's tool call format
|
||||
2. Parallel tool call detection and splitting
|
||||
3. Tool call success rate tracking and benchmarking
|
||||
4. Fallback parsing strategies for malformed output
|
||||
|
||||
Usage:
|
||||
from agent.gemma4_tool_hardening import Gemma4ToolParser
|
||||
parser = Gemma4ToolParser()
|
||||
tool_calls = parser.parse(response_text)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolCallAttempt:
|
||||
"""Record of a single tool call parsing attempt."""
|
||||
raw_text: str
|
||||
parsed: bool
|
||||
tool_name: str
|
||||
arguments: dict
|
||||
error: str
|
||||
strategy: str # "native", "json_block", "regex", "fallback"
|
||||
timestamp: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class Gemma4BenchmarkResult:
|
||||
"""Result of a tool calling benchmark run."""
|
||||
total_calls: int = 0
|
||||
successful_parses: int = 0
|
||||
parallel_calls: int = 0
|
||||
strategies_used: Dict[str, int] = field(default_factory=dict)
|
||||
avg_parse_time_ms: float = 0.0
|
||||
success_rate: float = 0.0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"total_calls": self.total_calls,
|
||||
"successful_parses": self.successful_parses,
|
||||
"parallel_calls": self.parallel_calls,
|
||||
"success_rate": round(self.success_rate, 3),
|
||||
"strategies_used": self.strategies_used,
|
||||
"avg_parse_time_ms": round(self.avg_parse_time_ms, 2),
|
||||
"error_count": len(self.errors),
|
||||
"errors": self.errors[:10],
|
||||
}
|
||||
|
||||
|
||||
class Gemma4ToolParser:
|
||||
"""Robust tool call parser for Gemma 4 output format.
|
||||
|
||||
Tries multiple parsing strategies in order:
|
||||
1. Native OpenAI format (standard tool_calls)
|
||||
2. JSON code blocks (```json ... ```)
|
||||
3. Regex extraction (function_name + arguments patterns)
|
||||
4. Heuristic fallback (best-effort extraction)
|
||||
"""
|
||||
|
||||
# Patterns for Gemma 4 tool call formats
|
||||
_JSON_BLOCK_PATTERN = re.compile(
|
||||
r'```(?:json)?\s*\n?(.*?)\n?```',
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
_FUNCTION_CALL_PATTERN = re.compile(
|
||||
r'(?:function|tool|call)[:\s]*(\w+)\s*\(\s*({.*?})\s*\)',
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
_GEMMA_INLINE_PATTERN = re.compile(
|
||||
r'\[(?:tool_call|function_call)\]\s*(\w+)\s*:\s*({.*?})',
|
||||
re.DOTALL | re.IGNORECASE,
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
self._attempts: List[ToolCallAttempt] = []
|
||||
self._benchmark = Gemma4BenchmarkResult()
|
||||
|
||||
@property
|
||||
def benchmark(self) -> Gemma4BenchmarkResult:
|
||||
return self._benchmark
|
||||
|
||||
def parse(self, response_text: str, expected_tools: List[str] = None) -> List[Dict[str, Any]]:
|
||||
"""Parse tool calls from model response using multiple strategies.
|
||||
|
||||
Returns list of tool call dicts in OpenAI format:
|
||||
[{"id": "...", "type": "function", "function": {"name": "...", "arguments": "..."}}]
|
||||
"""
|
||||
t0 = time.monotonic()
|
||||
self._benchmark.total_calls += 1
|
||||
|
||||
# Strategy 1: Native OpenAI format
|
||||
result = self._try_native_parse(response_text)
|
||||
if result:
|
||||
self._record_attempt(response_text, True, result, "native")
|
||||
self._benchmark.successful_parses += 1
|
||||
if len(result) > 1:
|
||||
self._benchmark.parallel_calls += 1
|
||||
self._benchmark.strategies_used["native"] = self._benchmark.strategies_used.get("native", 0) + 1
|
||||
self._update_timing(t0)
|
||||
return result
|
||||
|
||||
# Strategy 2: JSON code blocks
|
||||
result = self._try_json_block_parse(response_text, expected_tools)
|
||||
if result:
|
||||
self._record_attempt(response_text, True, result, "json_block")
|
||||
self._benchmark.successful_parses += 1
|
||||
if len(result) > 1:
|
||||
self._benchmark.parallel_calls += 1
|
||||
self._benchmark.strategies_used["json_block"] = self._benchmark.strategies_used.get("json_block", 0) + 1
|
||||
self._update_timing(t0)
|
||||
return result
|
||||
|
||||
# Strategy 3: Regex extraction
|
||||
result = self._try_regex_parse(response_text)
|
||||
if result:
|
||||
self._record_attempt(response_text, True, result, "regex")
|
||||
self._benchmark.successful_parses += 1
|
||||
self._benchmark.strategies_used["regex"] = self._benchmark.strategies_used.get("regex", 0) + 1
|
||||
self._update_timing(t0)
|
||||
return result
|
||||
|
||||
# Strategy 4: Heuristic fallback
|
||||
result = self._try_heuristic_parse(response_text, expected_tools)
|
||||
if result:
|
||||
self._record_attempt(response_text, True, result, "fallback")
|
||||
self._benchmark.successful_parses += 1
|
||||
self._benchmark.strategies_used["fallback"] = self._benchmark.strategies_used.get("fallback", 0) + 1
|
||||
self._update_timing(t0)
|
||||
return result
|
||||
|
||||
# All strategies failed
|
||||
self._record_attempt(response_text, False, [], "none")
|
||||
self._benchmark.errors.append(f"Failed to parse: {response_text[:200]}")
|
||||
self._update_timing(t0)
|
||||
return []
|
||||
|
||||
def _try_native_parse(self, text: str) -> List[Dict[str, Any]]:
|
||||
"""Try parsing standard OpenAI tool_calls JSON."""
|
||||
try:
|
||||
data = json.loads(text)
|
||||
if isinstance(data, dict) and "tool_calls" in data:
|
||||
return data["tool_calls"]
|
||||
if isinstance(data, list):
|
||||
if all(isinstance(item, dict) and "function" in item for item in data):
|
||||
return data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return []
|
||||
|
||||
def _try_json_block_parse(self, text: str, expected_tools: List[str] = None) -> List[Dict[str, Any]]:
|
||||
"""Extract tool calls from JSON code blocks."""
|
||||
matches = self._JSON_BLOCK_PATTERN.findall(text)
|
||||
calls = []
|
||||
for match in matches:
|
||||
try:
|
||||
data = json.loads(match.strip())
|
||||
if isinstance(data, dict):
|
||||
if "name" in data and "arguments" in data:
|
||||
calls.append(self._to_openai_format(data["name"], data["arguments"]))
|
||||
elif "function" in data and "arguments" in data:
|
||||
calls.append(self._to_openai_format(data["function"], data["arguments"]))
|
||||
elif isinstance(data, list):
|
||||
for item in data:
|
||||
if isinstance(item, dict) and "name" in item:
|
||||
args = item.get("arguments", item.get("args", {}))
|
||||
calls.append(self._to_openai_format(item["name"], args))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
return calls
|
||||
|
||||
def _try_regex_parse(self, text: str) -> List[Dict[str, Any]]:
|
||||
"""Extract tool calls using regex patterns."""
|
||||
calls = []
|
||||
|
||||
# Pattern: function_name({...})
|
||||
for match in self._FUNCTION_CALL_PATTERN.finditer(text):
|
||||
name = match.group(1)
|
||||
args_str = match.group(2)
|
||||
try:
|
||||
args = json.loads(args_str)
|
||||
calls.append(self._to_openai_format(name, args))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
# Pattern: [tool_call] name: {...}
|
||||
for match in self._GEMMA_INLINE_PATTERN.finditer(text):
|
||||
name = match.group(1)
|
||||
args_str = match.group(2)
|
||||
try:
|
||||
args = json.loads(args_str)
|
||||
calls.append(self._to_openai_format(name, args))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
return calls
|
||||
|
||||
def _try_heuristic_parse(self, text: str, expected_tools: List[str] = None) -> List[Dict[str, Any]]:
|
||||
"""Best-effort heuristic extraction."""
|
||||
if not expected_tools:
|
||||
return []
|
||||
|
||||
calls = []
|
||||
for tool_name in expected_tools:
|
||||
# Look for tool name near JSON-like content
|
||||
pattern = re.compile(
|
||||
rf'{re.escape(tool_name)}\s*[\(:]\s*({{[^}}]+}})',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
match = pattern.search(text)
|
||||
if match:
|
||||
try:
|
||||
args = json.loads(match.group(1))
|
||||
calls.append(self._to_openai_format(tool_name, args))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
return calls
|
||||
|
||||
def _to_openai_format(self, name: str, arguments: Any) -> Dict[str, Any]:
|
||||
"""Convert to OpenAI tool call format."""
|
||||
import uuid
|
||||
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
|
||||
return {
|
||||
"id": f"call_{uuid.uuid4().hex[:24]}",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": name,
|
||||
"arguments": args_str,
|
||||
},
|
||||
}
|
||||
|
||||
def _record_attempt(self, text: str, success: bool, result: list, strategy: str):
|
||||
self._attempts.append(ToolCallAttempt(
|
||||
raw_text=text[:500],
|
||||
parsed=success,
|
||||
tool_name=result[0]["function"]["name"] if result else "",
|
||||
arguments={},
|
||||
error="" if success else "parse failed",
|
||||
strategy=strategy,
|
||||
timestamp=time.time(),
|
||||
))
|
||||
|
||||
def _update_timing(self, t0: float):
|
||||
elapsed = (time.monotonic() - t0) * 1000
|
||||
n = self._benchmark.total_calls
|
||||
self._benchmark.avg_parse_time_ms = (
|
||||
(self._benchmark.avg_parse_time_ms * (n - 1) + elapsed) / n
|
||||
)
|
||||
self._benchmark.success_rate = (
|
||||
self._benchmark.successful_parses / n if n > 0 else 0
|
||||
)
|
||||
|
||||
def format_report(self) -> str:
|
||||
"""Format benchmark report."""
|
||||
b = self._benchmark
|
||||
lines = [
|
||||
"Gemma 4 Tool Calling Benchmark",
|
||||
"=" * 40,
|
||||
f"Total attempts: {b.total_calls}",
|
||||
f"Successful parses: {b.successful_parses}",
|
||||
f"Success rate: {b.success_rate:.1%}",
|
||||
f"Parallel calls: {b.parallel_calls}",
|
||||
f"Avg parse time: {b.avg_parse_time_ms:.2f}ms",
|
||||
"",
|
||||
"Strategies used:",
|
||||
]
|
||||
for strategy, count in sorted(b.strategies_used.items(), key=lambda x: -x[1]):
|
||||
lines.append(f" {strategy}: {count}")
|
||||
|
||||
if b.errors:
|
||||
lines.append("")
|
||||
lines.append(f"Errors ({len(b.errors)}):")
|
||||
for err in b.errors[:5]:
|
||||
lines.append(f" {err[:100]}")
|
||||
|
||||
return "\n".join(lines)
|
||||
132
tests/test_agent_card.py
Normal file
132
tests/test_agent_card.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""Tests for A2A agent card — Issue #819."""
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from agent.agent_card import (
|
||||
AgentSkill, AgentCapabilities, AgentCard,
|
||||
validate_agent_card, build_agent_card, get_agent_card_json,
|
||||
_load_skills_from_directory
|
||||
)
|
||||
|
||||
|
||||
class TestAgentSkill:
|
||||
def test_creation(self):
|
||||
skill = AgentSkill(id="code", name="Code", tags=["python"])
|
||||
assert skill.id == "code"
|
||||
assert "python" in skill.tags
|
||||
|
||||
|
||||
class TestAgentCapabilities:
|
||||
def test_defaults(self):
|
||||
caps = AgentCapabilities()
|
||||
assert caps.streaming == True
|
||||
assert caps.push_notifications == False
|
||||
|
||||
|
||||
class TestAgentCard:
|
||||
def test_to_dict(self):
|
||||
card = AgentCard(name="timmy", description="test", url="http://localhost:8642")
|
||||
d = card.to_dict()
|
||||
assert d["name"] == "timmy"
|
||||
assert "defaultInputModes" in d
|
||||
|
||||
def test_to_json(self):
|
||||
card = AgentCard(name="timmy", description="test", url="http://localhost:8642")
|
||||
j = card.to_json()
|
||||
parsed = json.loads(j)
|
||||
assert parsed["name"] == "timmy"
|
||||
|
||||
|
||||
class TestValidation:
|
||||
def test_valid_card(self):
|
||||
card = AgentCard(name="timmy", description="test", url="http://localhost:8642")
|
||||
errors = validate_agent_card(card)
|
||||
assert len(errors) == 0
|
||||
|
||||
def test_missing_name(self):
|
||||
card = AgentCard(name="", description="test", url="http://localhost:8642")
|
||||
errors = validate_agent_card(card)
|
||||
assert any("name" in e for e in errors)
|
||||
|
||||
def test_missing_url(self):
|
||||
card = AgentCard(name="timmy", description="test", url="")
|
||||
errors = validate_agent_card(card)
|
||||
assert any("url" in e for e in errors)
|
||||
|
||||
def test_invalid_input_mode(self):
|
||||
card = AgentCard(
|
||||
name="timmy", description="test", url="http://localhost:8642",
|
||||
default_input_modes=["invalid/mode"]
|
||||
)
|
||||
errors = validate_agent_card(card)
|
||||
assert any("invalid input mode" in e for e in errors)
|
||||
|
||||
def test_skill_missing_id(self):
|
||||
card = AgentCard(
|
||||
name="timmy", description="test", url="http://localhost:8642",
|
||||
skills=[AgentSkill(id="", name="test")]
|
||||
)
|
||||
errors = validate_agent_card(card)
|
||||
assert any("skill missing id" in e for e in errors)
|
||||
|
||||
|
||||
class TestBuildAgentCard:
|
||||
def test_builds_valid_card(self):
|
||||
card = build_agent_card()
|
||||
assert card.name
|
||||
assert card.url
|
||||
errors = validate_agent_card(card)
|
||||
assert len(errors) == 0
|
||||
|
||||
def test_explicit_params_override(self):
|
||||
card = build_agent_card(name="custom", description="custom desc")
|
||||
assert card.name == "custom"
|
||||
assert card.description == "custom desc"
|
||||
|
||||
def test_extra_skills(self):
|
||||
extra = [AgentSkill(id="extra", name="Extra")]
|
||||
card = build_agent_card(extra_skills=extra)
|
||||
assert any(s.id == "extra" for s in card.skills)
|
||||
|
||||
|
||||
class TestGetAgentCardJson:
|
||||
def test_returns_valid_json(self):
|
||||
j = get_agent_card_json()
|
||||
parsed = json.loads(j)
|
||||
assert "name" in parsed
|
||||
|
||||
def test_graceful_fallback(self):
|
||||
# Even if something fails, should return valid JSON
|
||||
j = get_agent_card_json()
|
||||
assert j # Non-empty
|
||||
|
||||
|
||||
class TestLoadSkills:
|
||||
def test_empty_dir(self, tmp_path):
|
||||
skills = _load_skills_from_directory(tmp_path / "nonexistent")
|
||||
assert len(skills) == 0
|
||||
|
||||
def test_parses_skill_md(self, tmp_path):
|
||||
skill_dir = tmp_path / "test-skill"
|
||||
skill_dir.mkdir()
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
skill_md.write_text("""---
|
||||
name: Test Skill
|
||||
description: A test skill
|
||||
tags:
|
||||
- test
|
||||
- example
|
||||
---
|
||||
Content here
|
||||
""")
|
||||
skills = _load_skills_from_directory(tmp_path)
|
||||
assert len(skills) == 1
|
||||
assert skills[0].name == "Test Skill"
|
||||
assert "test" in skills[0].tags
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import pytest
|
||||
pytest.main([__file__, "-v"])
|
||||
@@ -1,94 +0,0 @@
|
||||
"""Tests for Gemma 4 tool calling hardening."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||
|
||||
from agent.gemma4_tool_hardening import Gemma4ToolParser, Gemma4BenchmarkResult
|
||||
|
||||
|
||||
class TestNativeParse:
|
||||
def test_standard_tool_calls(self):
|
||||
parser = Gemma4ToolParser()
|
||||
text = json.dumps({"tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "read_file", "arguments": '{"path": "test.py"}'}}]})
|
||||
result = parser.parse(text)
|
||||
assert len(result) == 1
|
||||
assert result[0]["function"]["name"] == "read_file"
|
||||
|
||||
def test_list_format(self):
|
||||
parser = Gemma4ToolParser()
|
||||
text = json.dumps([{"id": "c1", "type": "function", "function": {"name": "terminal", "arguments": '{"command": "ls"}'}}])
|
||||
result = parser.parse(text)
|
||||
assert len(result) == 1
|
||||
|
||||
|
||||
class TestJsonBlockParse:
|
||||
def test_json_code_block(self):
|
||||
parser = Gemma4ToolParser()
|
||||
text = 'Here is the tool call:\n```json\n{"name": "read_file", "arguments": {"path": "test.py"}}\n```'
|
||||
result = parser.parse(text)
|
||||
assert len(result) == 1
|
||||
assert result[0]["function"]["name"] == "read_file"
|
||||
|
||||
def test_multiple_json_blocks(self):
|
||||
parser = Gemma4ToolParser()
|
||||
text = '```json\n{"name": "read_file", "arguments": {"path": "a.py"}}\n```\n```json\n{"name": "read_file", "arguments": {"path": "b.py"}}\n```'
|
||||
result = parser.parse(text)
|
||||
assert len(result) == 2
|
||||
|
||||
def test_list_in_json_block(self):
|
||||
parser = Gemma4ToolParser()
|
||||
text = '```json\n[{"name": "terminal", "arguments": {"command": "ls"}}]\n```'
|
||||
result = parser.parse(text)
|
||||
assert len(result) == 1
|
||||
|
||||
|
||||
class TestRegexParse:
|
||||
def test_function_call_pattern(self):
|
||||
parser = Gemma4ToolParser()
|
||||
text = 'I will call read_file({"path": "test.py"}) now.'
|
||||
result = parser.parse(text)
|
||||
assert len(result) == 1
|
||||
assert result[0]["function"]["name"] == "read_file"
|
||||
|
||||
def test_gemma_inline_pattern(self):
|
||||
parser = Gemma4ToolParser()
|
||||
text = '[tool_call] terminal: {"command": "pwd"}'
|
||||
result = parser.parse(text)
|
||||
assert len(result) == 1
|
||||
|
||||
|
||||
class TestHeuristicParse:
|
||||
def test_heuristic_with_expected_tools(self):
|
||||
parser = Gemma4ToolParser()
|
||||
text = 'Calling read_file({"path": "config.yaml"}) now'
|
||||
result = parser.parse(text, expected_tools=["read_file"])
|
||||
assert len(result) == 1
|
||||
|
||||
def test_heuristic_without_expected_tools(self):
|
||||
parser = Gemma4ToolParser()
|
||||
text = 'Some text with {"key": "value"} but no tool name'
|
||||
result = parser.parse(text)
|
||||
assert len(result) == 0
|
||||
|
||||
|
||||
class TestBenchmark:
|
||||
def test_benchmark_counts(self):
|
||||
parser = Gemma4ToolParser()
|
||||
parser.parse(json.dumps({"tool_calls": [{"id": "1", "type": "function", "function": {"name": "x", "arguments": "{}"}}]}))
|
||||
parser.parse('```json\n{"name": "y", "arguments": {}}\n```')
|
||||
parser.parse('no tool call here')
|
||||
b = parser.benchmark
|
||||
assert b.total_calls == 3
|
||||
assert b.successful_parses == 2
|
||||
assert abs(b.success_rate - 2/3) < 0.01
|
||||
|
||||
def test_report_format(self):
|
||||
parser = Gemma4ToolParser()
|
||||
parser.parse(json.dumps({"tool_calls": [{"id": "1", "type": "function", "function": {"name": "x", "arguments": "{}"}}]}))
|
||||
report = parser.format_report()
|
||||
assert "Gemma 4 Tool Calling Benchmark" in report
|
||||
assert "native" in report
|
||||
Reference in New Issue
Block a user