feat: add Anthropic transport abstraction slice (#951 )

- add transport registry, shared transport dataclasses, and AnthropicTransport - add normalize_anthropic_response_v2 as the bridge from existing Anthropic normalization to shared transport types - extend Anthropic stop-reason mapping for refusal and model_context_window_exceeded - add targeted transport and v2 normalization regression tests Closes #951 Refs #949
2026-04-22 11:20:20 -04:00
13 changed files with 863 additions and 607 deletions
--- a/agent/a2a_mtls.py
+++ b/agent/a2a_mtls.py
@@ -29,8 +29,6 @@ import logging
 import os
 import ssl
 import threading
-import time
-import uuid
 from http.server import BaseHTTPRequestHandler, HTTPServer
 from pathlib import Path
 from typing import Any, Callable, Dict, Optional
@@ -443,244 +441,3 @@ class A2AMTLSClient:
    def post(self, url: str, json: Optional[Dict[str, Any]] = None, **kwargs: Any) -> Dict[str, Any]:
        data = (__import__("json").dumps(json).encode() if json is not None else None)
        return self._request("POST", url, data=data, **kwargs)
-
-
-# ---------------------------------------------------------------------------
-# Structured A2A task delegation over mTLS
-# ---------------------------------------------------------------------------
-
-_TERMINAL_TASK_STATES = {"completed", "failed", "canceled", "rejected"}
-
-
-def _iso_now() -> str:
-    return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
-
-
-def _task_status(state: str, message: str) -> Dict[str, Any]:
-    return {
-        "state": state,
-        "message": message,
-        "timestamp": _iso_now(),
-    }
-
-
-def _coerce_artifact(result: Any) -> Dict[str, Any]:
-    if isinstance(result, dict):
-        if "text" in result:
-            return result
-        if "artifact" in result and isinstance(result["artifact"], dict):
-            return result["artifact"]
-    return {"text": str(result)}
-
-
-def _build_task_record(task_id: str, task: str, requester: Optional[str], metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
-    return {
-        "taskId": task_id,
-        "task": task,
-        "requester": requester,
-        "metadata": metadata or {},
-        "artifacts": [],
-        "status": _task_status("submitted", "Task submitted"),
-    }
-
-
-def _default_agent_card(host: str, port: int) -> Dict[str, Any]:
-    base_url = f"https://{host}:{port}"
-    try:
-        from agent.agent_card import build_agent_card
-        from dataclasses import asdict
-
-        card = asdict(build_agent_card())
-    except Exception as exc:  # pragma: no cover - fallback only exercised when card build breaks
-        logger.warning("Falling back to minimal agent card: %s", exc)
-        card = {
-            "name": os.environ.get("HERMES_AGENT_NAME", "hermes"),
-            "description": "Hermes A2A task server",
-            "version": "unknown",
-        }
-    card["url"] = base_url
-    card["a2aTaskEndpoint"] = f"{base_url}/a2a/rpc"
-    return card
-
-
-def _default_local_hermes_executor(task_payload: Dict[str, Any]) -> Dict[str, Any]:
-    task_text = str(task_payload.get("task", "")).strip()
-    if not task_text:
-        return {"text": ""}
-    from run_agent import AIAgent
-
-    agent = AIAgent(quiet_mode=True)
-    result = agent.chat(task_text)
-    return {
-        "text": result,
-        "metadata": {"executor": "local-hermes"},
-    }
-
-
-class A2ATaskServer:
-    """JSON-RPC A2A task server running over the routing mTLS server."""
-
-    def __init__(
-        self,
-        cert: str | Path,
-        key: str | Path,
-        ca: str | Path,
-        host: str = "127.0.0.1",
-        port: int = 9443,
-        executor: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None,
-        card_factory: Optional[Callable[[], Dict[str, Any]]] = None,
-    ) -> None:
-        self.host = host
-        self.port = port
-        self._server = A2AMTLSServer(cert=cert, key=key, ca=ca, host=host, port=port)
-        self._executor = executor or _default_local_hermes_executor
-        self._card_factory = card_factory or (lambda: _default_agent_card(self.host, self.port))
-        self._tasks: Dict[str, Dict[str, Any]] = {}
-        self._lock = threading.Lock()
-        self._server.add_route("/.well-known/agent-card.json", self._handle_agent_card)
-        self._server.add_route("/agent-card.json", self._handle_agent_card)
-        self._server.add_route("/a2a/rpc", self._handle_rpc)
-
-    def __enter__(self) -> "A2ATaskServer":
-        self.start()
-        return self
-
-    def __exit__(self, *_: Any) -> None:
-        self.stop()
-
-    def start(self) -> None:
-        self._server.start()
-
-    def stop(self) -> None:
-        self._server.stop()
-
-    def _handle_agent_card(self, payload: Dict[str, Any], *, peer_cn: str | None = None) -> Dict[str, Any]:
-        return self._card_factory()
-
-    def _handle_rpc(self, payload: Dict[str, Any], *, peer_cn: str | None = None) -> Dict[str, Any]:
-        req_id = payload.get("id")
-        if payload.get("jsonrpc") != "2.0":
-            return {"jsonrpc": "2.0", "id": req_id, "error": {"code": -32600, "message": "invalid jsonrpc version"}}
-
-        method = payload.get("method")
-        params = payload.get("params") or {}
-        try:
-            if method == "tasks/send":
-                result = self._rpc_send_task(params, peer_cn=peer_cn)
-            elif method == "tasks/get":
-                result = self._rpc_get_task(params)
-            else:
-                return {"jsonrpc": "2.0", "id": req_id, "error": {"code": -32601, "message": f"unknown method: {method}"}}
-        except Exception as exc:
-            logger.exception("A2A task RPC failed: %s", exc)
-            return {"jsonrpc": "2.0", "id": req_id, "error": {"code": -32000, "message": str(exc)}}
-        return {"jsonrpc": "2.0", "id": req_id, "result": result}
-
-    def _rpc_send_task(self, params: Dict[str, Any], *, peer_cn: str | None = None) -> Dict[str, Any]:
-        task_text = str(params.get("task", "")).strip()
-        if not task_text:
-            raise ValueError("task is required")
-        task_id = params.get("taskId") or uuid.uuid4().hex
-        requester = params.get("requester") or peer_cn
-        metadata = dict(params.get("metadata") or {})
-        if peer_cn:
-            metadata.setdefault("peer_cn", peer_cn)
-        record = _build_task_record(task_id, task_text, requester, metadata)
-        with self._lock:
-            self._tasks[task_id] = record
-        worker = threading.Thread(target=self._run_task, args=(task_id,), daemon=True, name=f"a2a-task-{task_id[:8]}")
-        worker.start()
-        return self._copy_task(task_id)
-
-    def _rpc_get_task(self, params: Dict[str, Any]) -> Dict[str, Any]:
-        task_id = str(params.get("taskId", "")).strip()
-        if not task_id:
-            raise ValueError("taskId is required")
-        return self._copy_task(task_id)
-
-    def _copy_task(self, task_id: str) -> Dict[str, Any]:
-        with self._lock:
-            if task_id not in self._tasks:
-                raise KeyError(f"unknown taskId: {task_id}")
-            return json.loads(json.dumps(self._tasks[task_id]))
-
-    def _run_task(self, task_id: str) -> None:
-        with self._lock:
-            task = self._tasks[task_id]
-            task["status"] = _task_status("working", "Task is running")
-            task_payload = {
-                "taskId": task["taskId"],
-                "task": task["task"],
-                "requester": task.get("requester"),
-                "metadata": dict(task.get("metadata") or {}),
-            }
-        try:
-            result = self._executor(task_payload)
-            artifact = _coerce_artifact(result)
-            with self._lock:
-                task = self._tasks[task_id]
-                task["artifacts"] = [artifact]
-                task["status"] = _task_status("completed", "Task completed")
-        except Exception as exc:
-            with self._lock:
-                task = self._tasks[task_id]
-                task["status"] = _task_status("failed", f"Task failed: {exc}")
-
-
-class A2ATaskClient(A2AMTLSClient):
-    """Client helper for A2A JSON-RPC task send/get flows."""
-
-    def discover_card(self, base_url: str) -> Dict[str, Any]:
-        return self.get(f"{base_url.rstrip('/')}/.well-known/agent-card.json")
-
-    def _rpc_call(self, base_url: str, method: str, params: Dict[str, Any]) -> Dict[str, Any]:
-        payload = {
-            "jsonrpc": "2.0",
-            "id": uuid.uuid4().hex,
-            "method": method,
-            "params": params,
-        }
-        response = self.post(f"{base_url.rstrip('/')}/a2a/rpc", json=payload)
-        if "error" in response:
-            error = response["error"]
-            raise RuntimeError(error.get("message") or str(error))
-        return response.get("result", {})
-
-    def send_task(
-        self,
-        base_url: str,
-        *,
-        task: str,
-        requester: str | None = None,
-        metadata: Optional[Dict[str, Any]] = None,
-    ) -> Dict[str, Any]:
-        return self._rpc_call(
-            base_url,
-            "tasks/send",
-            {
-                "task": task,
-                "requester": requester,
-                "metadata": metadata or {},
-            },
-        )
-
-    def get_task(self, base_url: str, task_id: str) -> Dict[str, Any]:
-        return self._rpc_call(base_url, "tasks/get", {"taskId": task_id})
-
-    def wait_for_task(
-        self,
-        base_url: str,
-        task_id: str,
-        *,
-        timeout: float = 30.0,
-        poll_interval: float = 0.5,
-    ) -> Dict[str, Any]:
-        deadline = time.monotonic() + timeout
-        while True:
-            task = self.get_task(base_url, task_id)
-            state = str(((task.get("status") or {}).get("state") or "")).lower()
-            if state in _TERMINAL_TASK_STATES:
-                return task
-            if time.monotonic() >= deadline:
-                raise TimeoutError(f"Timed out waiting for task {task_id}")
-            time.sleep(poll_interval)
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -1396,6 +1396,8 @@ def normalize_anthropic_response(
        "tool_use": "tool_calls",
        "max_tokens": "length",
        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
    }
    finish_reason = stop_reason_map.get(response.stop_reason, "stop")

@@ -1409,3 +1411,42 @@ def normalize_anthropic_response(
        ),
        finish_reason,
    )
+
+
+def normalize_anthropic_response_v2(
+    response,
+    strip_tool_prefix: bool = False,
+) -> "NormalizedResponse":
+    """Normalize Anthropic response to NormalizedResponse.
+
+    Wraps the existing normalize_anthropic_response() and maps its output
+    to the shared transport types. This allows incremental migration
+    without disturbing the legacy call sites.
+    """
+    from agent.transports.types import NormalizedResponse, build_tool_call
+
+    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
+
+    tool_calls = None
+    if assistant_msg.tool_calls:
+        tool_calls = [
+            build_tool_call(
+                id=tc.id,
+                name=tc.function.name,
+                arguments=tc.function.arguments,
+            )
+            for tc in assistant_msg.tool_calls
+        ]
+
+    provider_data = {}
+    if getattr(assistant_msg, "reasoning_details", None):
+        provider_data["reasoning_details"] = assistant_msg.reasoning_details
+
+    return NormalizedResponse(
+        content=assistant_msg.content,
+        tool_calls=tool_calls,
+        finish_reason=finish_reason,
+        reasoning=getattr(assistant_msg, "reasoning", None),
+        usage=None,
+        provider_data=provider_data or None,
+    )
--- a/agent/transports/init.py
+++ b/agent/transports/init.py
@@ -0,0 +1,57 @@
+"""Transport layer types and registry for provider response normalization.
+
+Usage:
+    from agent.transports import get_transport
+    transport = get_transport("anthropic_messages")
+    result = transport.normalize_response(raw_response)
+"""
+
+from agent.transports.types import (  # noqa: F401
+    NormalizedResponse,
+    ToolCall,
+    Usage,
+    build_tool_call,
+    map_finish_reason,
+)
+
+_REGISTRY: dict = {}
+
+
+def register_transport(api_mode: str, transport_cls: type) -> None:
+    """Register a transport class for an api_mode string."""
+    _REGISTRY[api_mode] = transport_cls
+
+
+def get_transport(api_mode: str):
+    """Get a transport instance for the given api_mode.
+
+    Returns None if no transport is registered for this api_mode.
+    This allows gradual migration — call sites can check for None
+    and fall back to the legacy code path.
+    """
+    if not _REGISTRY:
+        _discover_transports()
+    cls = _REGISTRY.get(api_mode)
+    if cls is None:
+        return None
+    return cls()
+
+
+def _discover_transports() -> None:
+    """Import all transport modules to trigger auto-registration."""
+    try:
+        import agent.transports.anthropic  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.codex  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.chat_completions  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.bedrock  # noqa: F401
+    except ImportError:
+        pass
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -0,0 +1,95 @@
+"""Anthropic Messages API transport.
+
+Delegates to the existing adapter functions in agent/anthropic_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse
+
+
+class AnthropicTransport(ProviderTransport):
+    """Transport for api_mode='anthropic_messages'."""
+
+    @property
+    def api_mode(self) -> str:
+        return "anthropic_messages"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        base_url = kwargs.get("base_url")
+        return convert_messages_to_anthropic(messages, base_url=base_url)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        from agent.anthropic_adapter import convert_tools_to_anthropic
+
+        return convert_tools_to_anthropic(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        return build_anthropic_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 16384),
+            reasoning_config=params.get("reasoning_config"),
+            tool_choice=params.get("tool_choice"),
+            is_oauth=params.get("is_oauth", False),
+            preserve_dots=params.get("preserve_dots", False),
+            context_length=params.get("context_length"),
+            base_url=params.get("base_url"),
+            fast_mode=params.get("fast_mode", False),
+        )
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        from agent.anthropic_adapter import normalize_anthropic_response_v2
+
+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
+
+    def validate_response(self, response: Any) -> bool:
+        if response is None:
+            return False
+        content_blocks = getattr(response, "content", None)
+        if not isinstance(content_blocks, list):
+            return False
+        if not content_blocks:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any):
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        cached = getattr(usage, "cache_read_input_tokens", 0) or 0
+        written = getattr(usage, "cache_creation_input_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+    _STOP_REASON_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        return self._STOP_REASON_MAP.get(raw_reason, "stop")
+
+
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("anthropic_messages", AnthropicTransport)
--- a/agent/transports/base.py
+++ b/agent/transports/base.py
@@ -0,0 +1,61 @@
+"""Abstract base for provider transports.
+
+A transport owns the data path for one api_mode:
+  convert_messages → convert_tools → build_kwargs → normalize_response
+
+It does NOT own: client construction, streaming, credential refresh,
+prompt caching, interrupt handling, or retry logic. Those stay on AIAgent.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+from agent.transports.types import NormalizedResponse
+
+
+class ProviderTransport(ABC):
+    """Base class for provider-specific format conversion and normalization."""
+
+    @property
+    @abstractmethod
+    def api_mode(self) -> str:
+        """The api_mode string this transport handles."""
+        ...
+
+    @abstractmethod
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI-format messages to provider-native format."""
+        ...
+
+    @abstractmethod
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI-format tool definitions to provider-native format."""
+        ...
+
+    @abstractmethod
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build the complete provider kwargs dict."""
+        ...
+
+    @abstractmethod
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize a raw provider response to the shared NormalizedResponse type."""
+        ...
+
+    def validate_response(self, response: Any) -> bool:
+        """Optional structural validation for raw responses."""
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Optional cache stats extraction."""
+        return None
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Optional stop-reason mapping. Defaults to passthrough."""
+        return raw_reason
--- a/agent/transports/types.py
+++ b/agent/transports/types.py
@@ -0,0 +1,58 @@
+"""Shared types for normalized provider responses."""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class ToolCall:
+    """A normalized tool call from any provider."""
+
+    id: Optional[str]
+    name: str
+    arguments: str
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+@dataclass
+class Usage:
+    """Token usage from an API response."""
+
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    cached_tokens: int = 0
+
+
+@dataclass
+class NormalizedResponse:
+    """Normalized API response from any provider."""
+
+    content: Optional[str]
+    tool_calls: Optional[List[ToolCall]]
+    finish_reason: str
+    reasoning: Optional[str] = None
+    usage: Optional[Usage] = None
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+def build_tool_call(
+    id: Optional[str],
+    name: str,
+    arguments: Any,
+    **provider_fields: Any,
+) -> ToolCall:
+    """Build a ToolCall, auto-serialising dict arguments."""
+    args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
+    provider_data = dict(provider_fields) if provider_fields else None
+    return ToolCall(id=id, name=name, arguments=args_str, provider_data=provider_data)
+
+
+def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
+    """Translate a provider-specific stop reason to the normalized set."""
+    if reason is None:
+        return "stop"
+    return mapping.get(reason, "stop")
--- a/hermes_cli/a2a_cmd.py
+++ b/hermes_cli/a2a_cmd.py
@@ -1,132 +0,0 @@
-"""CLI helpers for A2A task delegation."""
-
-from __future__ import annotations
-
-import json
-import os
-import re
-import sys
-import time
-from pathlib import Path
-from typing import Any
-
-from agent.a2a_mtls import A2ATaskClient, A2ATaskServer
-from hermes_cli.config import get_hermes_home
-
-
-def _registry_path() -> Path:
-    return get_hermes_home() / "a2a_agents.json"
-
-
-def _default_identity_paths() -> tuple[str, str, str]:
-    hermes_home = get_hermes_home()
-    agent_name = os.environ.get("HERMES_AGENT_NAME", "hermes").lower()
-    cert = os.environ.get(
-        "HERMES_A2A_CERT",
-        str(hermes_home / "pki" / "agents" / agent_name / f"{agent_name}.crt"),
-    )
-    key = os.environ.get(
-        "HERMES_A2A_KEY",
-        str(hermes_home / "pki" / "agents" / agent_name / f"{agent_name}.key"),
-    )
-    ca = os.environ.get(
-        "HERMES_A2A_CA",
-        str(hermes_home / "pki" / "ca" / "fleet-ca.crt"),
-    )
-    return cert, key, ca
-
-
-def load_agent_registry(path: Path | None = None) -> dict[str, Any]:
-    registry_path = path or _registry_path()
-    if not registry_path.exists():
-        return {}
-    return json.loads(registry_path.read_text(encoding="utf-8"))
-
-
-def resolve_agent_url(agent: str, *, registry_path: Path | None = None) -> str:
-    key = re.sub(r"[^A-Za-z0-9]+", "_", agent).upper()
-    env_value = os.getenv(f"HERMES_A2A_{key}_URL")
-    if env_value:
-        return env_value
-
-    registry = load_agent_registry(registry_path)
-    entry = registry.get(agent)
-    if isinstance(entry, str) and entry:
-        return entry
-    if isinstance(entry, dict):
-        url = entry.get("url") or entry.get("base_url") or entry.get("card_url")
-        if url:
-            return str(url)
-    if agent.startswith("https://") or agent.startswith("http://"):
-        return agent
-    raise SystemExit(f"Unknown A2A agent '{agent}'. Set HERMES_A2A_{key}_URL or add it to {_registry_path()}.")
-
-
-def _print(data: dict[str, Any]) -> None:
-    print(json.dumps(data, indent=2, ensure_ascii=False))
-
-
-def cmd_send(args) -> None:
-    base_url = args.url or resolve_agent_url(args.agent)
-    cert, key, ca = args.cert, args.key, args.ca
-    if not (cert and key and ca):
-        cert, key, ca = _default_identity_paths()
-    client = A2ATaskClient(cert=cert, key=key, ca=ca)
-    card = client.discover_card(base_url)
-    task = client.send_task(
-        base_url,
-        task=args.task,
-        requester=args.requester,
-        metadata={"agent": args.agent},
-    )
-    if args.wait:
-        task = client.wait_for_task(
-            base_url,
-            task["taskId"],
-            timeout=args.timeout,
-            poll_interval=args.poll_interval,
-        )
-    _print({
-        "agent": args.agent,
-        "url": base_url,
-        "card": card,
-        "task": task,
-    })
-
-
-def cmd_status(args) -> None:
-    base_url = args.url or resolve_agent_url(args.agent)
-    cert, key, ca = args.cert, args.key, args.ca
-    if not (cert and key and ca):
-        cert, key, ca = _default_identity_paths()
-    client = A2ATaskClient(cert=cert, key=key, ca=ca)
-    task = client.get_task(base_url, args.task_id)
-    _print({"agent": args.agent, "url": base_url, "task": task})
-
-
-def cmd_serve(args) -> None:
-    cert, key, ca = args.cert, args.key, args.ca
-    if not (cert and key and ca):
-        cert, key, ca = _default_identity_paths()
-    server = A2ATaskServer(cert=cert, key=key, ca=ca, host=args.host, port=args.port)
-    server.start()
-    print(f"A2A task server listening on https://{args.host}:{args.port}")
-    try:
-        while True:
-            time.sleep(1)
-    except KeyboardInterrupt:
-        server.stop()
-
-
-def cmd_a2a(args) -> None:
-    command = getattr(args, "a2a_command", None) or "send"
-    if command == "send":
-        cmd_send(args)
-        return
-    if command == "status":
-        cmd_status(args)
-        return
-    if command == "serve":
-        cmd_serve(args)
-        return
-    raise SystemExit(f"Unknown a2a command: {command}")
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -173,13 +173,6 @@ from hermes_constants import OPENROUTER_BASE_URL
 logger = logging.getLogger(__name__)


-def cmd_a2a(args):
-    """Dispatch A2A CLI subcommands lazily to avoid heavy imports at startup."""
-    from hermes_cli.a2a_cmd import cmd_a2a as _cmd_a2a
-
-    return _cmd_a2a(args)
-
-
 def _relative_time(ts) -> str:
    """Format a timestamp as relative time (e.g., '2h ago', 'yesterday')."""
    if not ts:
@@ -4788,45 +4781,6 @@ For more help on a command:

    gateway_parser.set_defaults(func=cmd_gateway)
    
-    # =========================================================================
-    # a2a command
-    # =========================================================================
-    a2a_parser = subparsers.add_parser(
-        "a2a",
-        help="A2A task delegation over mutual TLS",
-        description="Send, inspect, and serve structured A2A tasks between Hermes agents",
-    )
-    a2a_subparsers = a2a_parser.add_subparsers(dest="a2a_command")
-
-    a2a_send = a2a_subparsers.add_parser("send", help="Send an A2A task to another agent")
-    a2a_send.add_argument("--agent", required=True, help="Agent alias or URL (for example: allegro)")
-    a2a_send.add_argument("--task", required=True, help="Task text to delegate")
-    a2a_send.add_argument("--url", help="Explicit base URL for the remote agent")
-    a2a_send.add_argument("--requester", default=None, help="Requester label included in task metadata")
-    a2a_send.add_argument("--wait", action="store_true", help="Poll until the task reaches a terminal state")
-    a2a_send.add_argument("--timeout", type=float, default=30.0, help="Wait timeout in seconds (default: 30)")
-    a2a_send.add_argument("--poll-interval", type=float, default=0.5, help="Polling interval in seconds while waiting (default: 0.5)")
-    a2a_send.add_argument("--cert", default=None, help="Client certificate path (defaults from HERMES_A2A_CERT)")
-    a2a_send.add_argument("--key", default=None, help="Client private key path (defaults from HERMES_A2A_KEY)")
-    a2a_send.add_argument("--ca", default=None, help="Fleet CA certificate path (defaults from HERMES_A2A_CA)")
-
-    a2a_status = a2a_subparsers.add_parser("status", help="Fetch the current status of an A2A task")
-    a2a_status.add_argument("--agent", required=True, help="Agent alias or URL (for example: allegro)")
-    a2a_status.add_argument("--task-id", required=True, help="Task identifier returned by a2a send")
-    a2a_status.add_argument("--url", help="Explicit base URL for the remote agent")
-    a2a_status.add_argument("--cert", default=None, help="Client certificate path (defaults from HERMES_A2A_CERT)")
-    a2a_status.add_argument("--key", default=None, help="Client private key path (defaults from HERMES_A2A_KEY)")
-    a2a_status.add_argument("--ca", default=None, help="Fleet CA certificate path (defaults from HERMES_A2A_CA)")
-
-    a2a_serve = a2a_subparsers.add_parser("serve", help="Run the local A2A task server")
-    a2a_serve.add_argument("--host", default=os.environ.get("HERMES_A2A_HOST", "127.0.0.1"), help="Bind host (default: HERMES_A2A_HOST or 127.0.0.1)")
-    a2a_serve.add_argument("--port", type=int, default=int(os.environ.get("HERMES_A2A_PORT", "9443")), help="Bind port (default: HERMES_A2A_PORT or 9443)")
-    a2a_serve.add_argument("--cert", default=None, help="Server certificate path (defaults from HERMES_A2A_CERT)")
-    a2a_serve.add_argument("--key", default=None, help="Server private key path (defaults from HERMES_A2A_KEY)")
-    a2a_serve.add_argument("--ca", default=None, help="Fleet CA certificate path (defaults from HERMES_A2A_CA)")
-
-    a2a_parser.set_defaults(func=cmd_a2a)
-    
    # =========================================================================
    # setup command
    # =========================================================================
--- a/tests/agent/test_a2a_mtls.py
+++ b/tests/agent/test_a2a_mtls.py
@@ -572,94 +572,3 @@ class TestA2AMTLSServerAndClient:

        assert not errors, f"Concurrent connection errors: {errors}"
        assert len(results) == 3
-
-
-@_requires_crypto
-class TestA2ATaskServerAndClient:
-    """Structured A2A task send/get flow over mTLS."""
-
-    @pytest.fixture(autouse=True)
-    def _pki(self, tmp_path):
-        ca_dir = tmp_path / "ca"
-        ca_dir.mkdir()
-        self.ca_crt, self.ca_key = _make_ca_keypair(ca_dir)
-        agent_dir = tmp_path / "agents"
-        agent_dir.mkdir()
-        self.srv_crt, self.srv_key = _make_agent_keypair(
-            agent_dir, "timmy", self.ca_crt, self.ca_key
-        )
-        self.cli_crt, self.cli_key = _make_agent_keypair(
-            agent_dir, "allegro", self.ca_crt, self.ca_key
-        )
-
-    @pytest.fixture()
-    def task_server(self):
-        from agent.a2a_mtls import A2ATaskServer
-
-        gate = threading.Event()
-
-        def analyze_executor(task: dict[str, object]) -> dict[str, object]:
-            gate.wait(timeout=2)
-            text = str(task.get("task", ""))
-            return {
-                "text": f"analysis:{text}",
-                "metadata": {"tool": "local-hermes-stub"},
-            }
-
-        port = _find_free_port()
-        server = A2ATaskServer(
-            cert=self.srv_crt,
-            key=self.srv_key,
-            ca=self.ca_crt,
-            host="127.0.0.1",
-            port=port,
-            executor=analyze_executor,
-        )
-        with server:
-            time.sleep(0.1)
-            yield server, port, gate
-
-    def test_task_send_get_and_completion_flow(self, task_server):
-        from agent.a2a_mtls import A2ATaskClient
-
-        server, port, gate = task_server
-        client = A2ATaskClient(cert=self.cli_crt, key=self.cli_key, ca=self.ca_crt)
-        base_url = f"https://127.0.0.1:{port}"
-
-        card = client.discover_card(base_url)
-        assert card["name"]
-
-        submitted = client.send_task(base_url, task="Analyze README.md", requester="timmy")
-        assert submitted["status"]["state"] in {"submitted", "working"}
-
-        in_flight = client.get_task(base_url, submitted["taskId"])
-        assert in_flight["status"]["state"] in {"submitted", "working"}
-
-        gate.set()
-        completed = client.wait_for_task(base_url, submitted["taskId"], timeout=5.0, poll_interval=0.05)
-        assert completed["status"]["state"] == "completed"
-        assert completed["artifacts"][0]["text"] == "analysis:Analyze README.md"
-
-    def test_failed_executor_marks_task_failed(self):
-        from agent.a2a_mtls import A2ATaskClient, A2ATaskServer
-
-        def failing_executor(task: dict[str, object]) -> dict[str, object]:
-            raise RuntimeError("boom")
-
-        port = _find_free_port()
-        server = A2ATaskServer(
-            cert=self.srv_crt,
-            key=self.srv_key,
-            ca=self.ca_crt,
-            host="127.0.0.1",
-            port=port,
-            executor=failing_executor,
-        )
-        with server:
-            time.sleep(0.1)
-            client = A2ATaskClient(cert=self.cli_crt, key=self.cli_key, ca=self.ca_crt)
-            base_url = f"https://127.0.0.1:{port}"
-            submitted = client.send_task(base_url, task="explode", requester="timmy")
-            failed = client.wait_for_task(base_url, submitted["taskId"], timeout=5.0, poll_interval=0.05)
-            assert failed["status"]["state"] == "failed"
-            assert "boom" in failed["status"]["message"]
--- a/tests/agent/test_anthropic_normalize_v2.py
+++ b/tests/agent/test_anthropic_normalize_v2.py
@@ -0,0 +1,213 @@
+"""Regression tests: normalize_anthropic_response_v2 vs v1.
+
+Constructs mock Anthropic responses and asserts that the v2 function
+(returning NormalizedResponse) produces identical field values to the
+original v1 function (returning SimpleNamespace + finish_reason).
+"""
+
+from types import SimpleNamespace
+
+import pytest
+
+from agent.anthropic_adapter import (
+    normalize_anthropic_response,
+    normalize_anthropic_response_v2,
+)
+from agent.transports.types import NormalizedResponse
+
+
+def _text_block(text: str):
+    return SimpleNamespace(type="text", text=text)
+
+
+def _thinking_block(thinking: str, signature: str = "sig_abc"):
+    return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
+
+
+def _tool_use_block(id: str, name: str, input: dict):
+    return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
+
+
+def _response(content_blocks, stop_reason="end_turn"):
+    return SimpleNamespace(
+        content=content_blocks,
+        stop_reason=stop_reason,
+        usage=SimpleNamespace(input_tokens=10, output_tokens=5),
+    )
+
+
+class TestTextOnly:
+    def setup_method(self):
+        self.resp = _response([_text_block("Hello world")])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_type(self):
+        assert isinstance(self.v2, NormalizedResponse)
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_finish_reason_matches(self):
+        assert self.v2.finish_reason == self.v1_finish
+
+    def test_no_tool_calls(self):
+        assert self.v2.tool_calls is None
+        assert self.v1_msg.tool_calls is None
+
+    def test_no_reasoning(self):
+        assert self.v2.reasoning is None
+        assert self.v1_msg.reasoning is None
+
+
+class TestWithToolCalls:
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _text_block("I'll check that"),
+                _tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
+                _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_finish_reason(self):
+        assert self.v2.finish_reason == "tool_calls"
+        assert self.v1_finish == "tool_calls"
+
+    def test_tool_call_count(self):
+        assert len(self.v2.tool_calls) == 2
+        assert len(self.v1_msg.tool_calls) == 2
+
+    def test_tool_call_ids_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
+
+    def test_tool_call_names_match(self):
+        assert self.v2.tool_calls[0].name == "terminal"
+        assert self.v2.tool_calls[1].name == "read_file"
+        for i in range(2):
+            assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
+
+    def test_tool_call_arguments_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
+
+    def test_content_preserved(self):
+        assert self.v2.content == self.v1_msg.content
+        assert "check that" in self.v2.content
+
+
+class TestWithThinking:
+    def setup_method(self):
+        self.resp = _response([
+            _thinking_block("Let me think about this carefully..."),
+            _text_block("The answer is 42."),
+        ])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+        assert "think about this" in self.v2.reasoning
+
+    def test_reasoning_details_in_provider_data(self):
+        v1_details = self.v1_msg.reasoning_details
+        v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
+        assert v1_details is not None
+        assert v2_details is not None
+        assert len(v2_details) == len(v1_details)
+
+    def test_content_excludes_thinking(self):
+        assert self.v2.content == "The answer is 42."
+
+
+class TestMixed:
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _thinking_block("Planning my approach..."),
+                _text_block("I'll run the command"),
+                _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_all_fields_present(self):
+        assert self.v2.content is not None
+        assert self.v2.tool_calls is not None
+        assert self.v2.reasoning is not None
+        assert self.v2.finish_reason == "tool_calls"
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+
+    def test_tool_call_matches(self):
+        assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
+        assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
+
+
+class TestStopReasons:
+    @pytest.mark.parametrize("stop_reason,expected", [
+        ("end_turn", "stop"),
+        ("tool_use", "tool_calls"),
+        ("max_tokens", "length"),
+        ("stop_sequence", "stop"),
+        ("refusal", "content_filter"),
+        ("model_context_window_exceeded", "length"),
+        ("unknown_future_reason", "stop"),
+    ])
+    def test_stop_reason_mapping(self, stop_reason, expected):
+        resp = _response([_text_block("x")], stop_reason=stop_reason)
+        _v1_msg, v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.finish_reason == v1_finish == expected
+
+
+class TestStripToolPrefix:
+    def test_prefix_stripped(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
+        assert v1_msg.tool_calls[0].function.name == "terminal"
+        assert v2.tool_calls[0].name == "terminal"
+
+    def test_prefix_kept(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
+        assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
+        assert v2.tool_calls[0].name == "mcp_terminal"
+
+
+class TestEdgeCases:
+    def test_empty_content_blocks(self):
+        resp = _response([])
+        v1_msg, _v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.content == v1_msg.content
+        assert v2.content is None
+
+    def test_no_reasoning_details_means_none_provider_data(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.provider_data is None
+
+    def test_v2_returns_dataclass_not_namespace(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert isinstance(v2, NormalizedResponse)
+        assert not isinstance(v2, SimpleNamespace)
--- a/tests/agent/transports/test_transport.py
+++ b/tests/agent/transports/test_transport.py
@@ -0,0 +1,208 @@
+"""Tests for the transport ABC, registry, and AnthropicTransport."""
+
+from types import SimpleNamespace
+
+import pytest
+
+from agent.transports import _REGISTRY, get_transport, register_transport
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse
+
+
+class TestProviderTransportABC:
+    def test_cannot_instantiate_abc(self):
+        with pytest.raises(TypeError):
+            ProviderTransport()
+
+    def test_concrete_must_implement_all_abstract(self):
+        class Incomplete(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test"
+
+        with pytest.raises(TypeError):
+            Incomplete()
+
+    def test_minimal_concrete(self):
+        class Minimal(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test_minimal"
+
+            def convert_messages(self, messages, **kw):
+                return messages
+
+            def convert_tools(self, tools):
+                return tools
+
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {"model": model, "messages": messages}
+
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
+
+        t = Minimal()
+        assert t.api_mode == "test_minimal"
+        assert t.validate_response(None) is True
+        assert t.extract_cache_stats(None) is None
+        assert t.map_finish_reason("end_turn") == "end_turn"
+
+
+class TestTransportRegistry:
+    def test_get_unregistered_returns_none(self):
+        assert get_transport("nonexistent_mode") is None
+
+    def test_anthropic_registered_on_import(self):
+        import agent.transports.anthropic  # noqa: F401
+
+        t = get_transport("anthropic_messages")
+        assert t is not None
+        assert t.api_mode == "anthropic_messages"
+
+    def test_register_and_get(self):
+        class DummyTransport(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "dummy_test"
+
+            def convert_messages(self, messages, **kw):
+                return messages
+
+            def convert_tools(self, tools):
+                return tools
+
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {}
+
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
+
+        register_transport("dummy_test", DummyTransport)
+        t = get_transport("dummy_test")
+        assert t.api_mode == "dummy_test"
+        _REGISTRY.pop("dummy_test", None)
+
+
+class TestAnthropicTransport:
+    @pytest.fixture
+    def transport(self):
+        import agent.transports.anthropic  # noqa: F401
+
+        return get_transport("anthropic_messages")
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "anthropic_messages"
+
+    def test_convert_tools_simple(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "test_tool",
+                "description": "A test",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["name"] == "test_tool"
+        assert "input_schema" in result[0]
+
+    def test_validate_response_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_validate_response_empty_content(self, transport):
+        r = SimpleNamespace(content=[])
+        assert transport.validate_response(r) is False
+
+    def test_validate_response_valid(self, transport):
+        r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
+        assert transport.validate_response(r) is True
+
+    def test_map_finish_reason(self, transport):
+        assert transport.map_finish_reason("end_turn") == "stop"
+        assert transport.map_finish_reason("tool_use") == "tool_calls"
+        assert transport.map_finish_reason("max_tokens") == "length"
+        assert transport.map_finish_reason("stop_sequence") == "stop"
+        assert transport.map_finish_reason("refusal") == "content_filter"
+        assert transport.map_finish_reason("model_context_window_exceeded") == "length"
+        assert transport.map_finish_reason("unknown") == "stop"
+
+    def test_extract_cache_stats_none_usage(self, transport):
+        r = SimpleNamespace(usage=None)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_extract_cache_stats_with_cache(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
+        r = SimpleNamespace(usage=usage)
+        result = transport.extract_cache_stats(r)
+        assert result == {"cached_tokens": 100, "creation_tokens": 50}
+
+    def test_extract_cache_stats_zero(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
+        r = SimpleNamespace(usage=usage)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_normalize_response_text(self, transport):
+        r = SimpleNamespace(
+            content=[SimpleNamespace(type="text", text="Hello world")],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=5),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.tool_calls is None or nr.tool_calls == []
+        assert nr.finish_reason == "stop"
+
+    def test_normalize_response_tool_calls(self, transport):
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(type="tool_use", id="toolu_123", name="terminal", input={"command": "ls"}),
+            ],
+            stop_reason="tool_use",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=20),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        tc = nr.tool_calls[0]
+        assert tc.name == "terminal"
+        assert tc.id == "toolu_123"
+        assert '"command"' in tc.arguments
+
+    def test_normalize_response_thinking(self, transport):
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(type="thinking", thinking="Let me think..."),
+                SimpleNamespace(type="text", text="The answer is 42"),
+            ],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=15),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.content == "The answer is 42"
+        assert nr.reasoning == "Let me think..."
+
+    def test_build_kwargs_returns_dict(self, transport):
+        messages = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(
+            model="claude-sonnet-4-6",
+            messages=messages,
+            max_tokens=1024,
+        )
+        assert isinstance(kw, dict)
+        assert "model" in kw
+        assert "max_tokens" in kw
+        assert "messages" in kw
+
+    def test_convert_messages_extracts_system(self, transport):
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hi"},
+        ]
+        system, msgs = transport.convert_messages(messages)
+        assert system is not None
+        assert len(msgs) >= 1
--- a/tests/agent/transports/test_types.py
+++ b/tests/agent/transports/test_types.py
@@ -0,0 +1,130 @@
+"""Tests for agent/transports/types.py — dataclass construction + helpers."""
+
+import json
+
+from agent.transports.types import (
+    NormalizedResponse,
+    ToolCall,
+    Usage,
+    build_tool_call,
+    map_finish_reason,
+)
+
+
+class TestToolCall:
+    def test_basic_construction(self):
+        tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}')
+        assert tc.id == "call_abc"
+        assert tc.name == "terminal"
+        assert tc.arguments == '{"cmd": "ls"}'
+        assert tc.provider_data is None
+
+    def test_none_id(self):
+        tc = ToolCall(id=None, name="read_file", arguments="{}")
+        assert tc.id is None
+
+    def test_provider_data(self):
+        tc = ToolCall(
+            id="call_x",
+            name="t",
+            arguments="{}",
+            provider_data={"call_id": "call_x", "response_item_id": "fc_x"},
+        )
+        assert tc.provider_data["call_id"] == "call_x"
+        assert tc.provider_data["response_item_id"] == "fc_x"
+
+
+class TestUsage:
+    def test_defaults(self):
+        u = Usage()
+        assert u.prompt_tokens == 0
+        assert u.completion_tokens == 0
+        assert u.total_tokens == 0
+        assert u.cached_tokens == 0
+
+    def test_explicit(self):
+        u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80)
+        assert u.total_tokens == 150
+
+
+class TestNormalizedResponse:
+    def test_text_only(self):
+        r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop")
+        assert r.content == "hello"
+        assert r.tool_calls is None
+        assert r.finish_reason == "stop"
+        assert r.reasoning is None
+        assert r.usage is None
+        assert r.provider_data is None
+
+    def test_with_tool_calls(self):
+        tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')]
+        r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls")
+        assert r.finish_reason == "tool_calls"
+        assert len(r.tool_calls) == 1
+        assert r.tool_calls[0].name == "terminal"
+
+    def test_with_reasoning(self):
+        r = NormalizedResponse(
+            content="answer",
+            tool_calls=None,
+            finish_reason="stop",
+            reasoning="I thought about it",
+        )
+        assert r.reasoning == "I thought about it"
+
+    def test_with_provider_data(self):
+        r = NormalizedResponse(
+            content=None,
+            tool_calls=None,
+            finish_reason="stop",
+            provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]},
+        )
+        assert r.provider_data["reasoning_details"][0]["type"] == "thinking"
+
+
+class TestBuildToolCall:
+    def test_dict_arguments_serialized(self):
+        tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"})
+        assert tc.arguments == json.dumps({"cmd": "ls"})
+        assert tc.provider_data is None
+
+    def test_string_arguments_passthrough(self):
+        tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}')
+        assert tc.arguments == '{"path": "/tmp"}'
+
+    def test_provider_fields(self):
+        tc = build_tool_call(
+            id="call_3",
+            name="terminal",
+            arguments="{}",
+            call_id="call_3",
+            response_item_id="fc_3",
+        )
+        assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"}
+
+    def test_none_id(self):
+        tc = build_tool_call(id=None, name="t", arguments="{}")
+        assert tc.id is None
+
+
+class TestMapFinishReason:
+    ANTHROPIC_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+    }
+
+    def test_known_reason(self):
+        assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop"
+        assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls"
+        assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length"
+        assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter"
+
+    def test_unknown_reason_defaults_to_stop(self):
+        assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop"
+
+    def test_none_reason(self):
+        assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"
--- a/tests/hermes_cli/test_a2a_cmd.py
+++ b/tests/hermes_cli/test_a2a_cmd.py
@@ -1,95 +0,0 @@
-from __future__ import annotations
-
-import argparse
-import json
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-
-
-def test_cmd_send_uses_registry_and_waits_for_terminal_task(tmp_path, monkeypatch, capsys):
-    hermes_home = tmp_path / ".hermes"
-    hermes_home.mkdir()
-    (hermes_home / "a2a_agents.json").write_text(
-        json.dumps({"allegro": {"url": "https://127.0.0.1:9443"}}),
-        encoding="utf-8",
-    )
-    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-
-    from hermes_cli.a2a_cmd import cmd_a2a
-
-    class FakeClient:
-        def __init__(self, **kwargs):
-            self.kwargs = kwargs
-
-        def discover_card(self, base_url: str):
-            assert base_url == "https://127.0.0.1:9443"
-            return {"name": "allegro", "url": base_url}
-
-        def send_task(self, base_url: str, *, task: str, requester: str | None = None, metadata=None):
-            assert task == "analyze README"
-            return {"taskId": "task-123", "status": {"state": "submitted"}}
-
-        def wait_for_task(self, base_url: str, task_id: str, *, timeout: float, poll_interval: float):
-            assert task_id == "task-123"
-            return {
-                "taskId": task_id,
-                "status": {"state": "completed"},
-                "artifacts": [{"text": "README looks healthy"}],
-            }
-
-    args = argparse.Namespace(
-        a2a_command="send",
-        agent="allegro",
-        task="analyze README",
-        url=None,
-        wait=True,
-        timeout=5.0,
-        poll_interval=0.01,
-        requester="timmy",
-        cert="cert.pem",
-        key="key.pem",
-        ca="ca.pem",
-    )
-
-    with patch("hermes_cli.a2a_cmd.A2ATaskClient", FakeClient):
-        cmd_a2a(args)
-
-    result = json.loads(capsys.readouterr().out)
-    assert result["agent"] == "allegro"
-    assert result["card"]["name"] == "allegro"
-    assert result["task"]["status"]["state"] == "completed"
-    assert result["task"]["artifacts"][0]["text"] == "README looks healthy"
-
-
-def test_resolve_agent_url_supports_env_override(monkeypatch):
-    monkeypatch.setenv("HERMES_A2A_ALLEGRO_URL", "https://fleet-allegro:9443")
-    from hermes_cli.a2a_cmd import resolve_agent_url
-
-    assert resolve_agent_url("allegro") == "https://fleet-allegro:9443"
-
-
-def test_cmd_send_requires_known_agent(tmp_path, monkeypatch):
-    hermes_home = tmp_path / ".hermes"
-    hermes_home.mkdir()
-    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-
-    from hermes_cli.a2a_cmd import cmd_a2a
-
-    args = argparse.Namespace(
-        a2a_command="send",
-        agent="unknown",
-        task="do work",
-        url=None,
-        wait=False,
-        timeout=5.0,
-        poll_interval=0.05,
-        requester=None,
-        cert="cert.pem",
-        key="key.pem",
-        ca="ca.pem",
-    )
-
-    with pytest.raises(SystemExit):
-        cmd_a2a(args)