feat: add Anthropic transport abstraction slice (#951 )

- add transport registry, shared transport dataclasses, and AnthropicTransport - add normalize_anthropic_response_v2 as the bridge from existing Anthropic normalization to shared transport types - extend Anthropic stop-reason mapping for refusal and model_context_window_exceeded - add targeted transport and v2 normalization regression tests Closes #951 Refs #949
2026-04-22 11:20:20 -04:00
12 changed files with 863 additions and 1727 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@@ -1396,6 +1396,8 @@ def normalize_anthropic_response(
        "tool_use": "tool_calls",
        "max_tokens": "length",
        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
    }
    finish_reason = stop_reason_map.get(response.stop_reason, "stop")

@@ -1409,3 +1411,42 @@ def normalize_anthropic_response(
        ),
        finish_reason,
    )
+
+
+def normalize_anthropic_response_v2(
+    response,
+    strip_tool_prefix: bool = False,
+) -> "NormalizedResponse":
+    """Normalize Anthropic response to NormalizedResponse.
+
+    Wraps the existing normalize_anthropic_response() and maps its output
+    to the shared transport types. This allows incremental migration
+    without disturbing the legacy call sites.
+    """
+    from agent.transports.types import NormalizedResponse, build_tool_call
+
+    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
+
+    tool_calls = None
+    if assistant_msg.tool_calls:
+        tool_calls = [
+            build_tool_call(
+                id=tc.id,
+                name=tc.function.name,
+                arguments=tc.function.arguments,
+            )
+            for tc in assistant_msg.tool_calls
+        ]
+
+    provider_data = {}
+    if getattr(assistant_msg, "reasoning_details", None):
+        provider_data["reasoning_details"] = assistant_msg.reasoning_details
+
+    return NormalizedResponse(
+        content=assistant_msg.content,
+        tool_calls=tool_calls,
+        finish_reason=finish_reason,
+        reasoning=getattr(assistant_msg, "reasoning", None),
+        usage=None,
+        provider_data=provider_data or None,
+    )
--- a/agent/transports/init.py
+++ b/agent/transports/init.py
@@ -0,0 +1,57 @@
+"""Transport layer types and registry for provider response normalization.
+
+Usage:
+    from agent.transports import get_transport
+    transport = get_transport("anthropic_messages")
+    result = transport.normalize_response(raw_response)
+"""
+
+from agent.transports.types import (  # noqa: F401
+    NormalizedResponse,
+    ToolCall,
+    Usage,
+    build_tool_call,
+    map_finish_reason,
+)
+
+_REGISTRY: dict = {}
+
+
+def register_transport(api_mode: str, transport_cls: type) -> None:
+    """Register a transport class for an api_mode string."""
+    _REGISTRY[api_mode] = transport_cls
+
+
+def get_transport(api_mode: str):
+    """Get a transport instance for the given api_mode.
+
+    Returns None if no transport is registered for this api_mode.
+    This allows gradual migration — call sites can check for None
+    and fall back to the legacy code path.
+    """
+    if not _REGISTRY:
+        _discover_transports()
+    cls = _REGISTRY.get(api_mode)
+    if cls is None:
+        return None
+    return cls()
+
+
+def _discover_transports() -> None:
+    """Import all transport modules to trigger auto-registration."""
+    try:
+        import agent.transports.anthropic  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.codex  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.chat_completions  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import agent.transports.bedrock  # noqa: F401
+    except ImportError:
+        pass
--- a/agent/transports/anthropic.py
+++ b/agent/transports/anthropic.py
@@ -0,0 +1,95 @@
+"""Anthropic Messages API transport.
+
+Delegates to the existing adapter functions in agent/anthropic_adapter.py.
+This transport owns format conversion and normalization — NOT client lifecycle.
+"""
+
+from typing import Any, Dict, List, Optional
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse
+
+
+class AnthropicTransport(ProviderTransport):
+    """Transport for api_mode='anthropic_messages'."""
+
+    @property
+    def api_mode(self) -> str:
+        return "anthropic_messages"
+
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        from agent.anthropic_adapter import convert_messages_to_anthropic
+
+        base_url = kwargs.get("base_url")
+        return convert_messages_to_anthropic(messages, base_url=base_url)
+
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        from agent.anthropic_adapter import convert_tools_to_anthropic
+
+        return convert_tools_to_anthropic(tools)
+
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        from agent.anthropic_adapter import build_anthropic_kwargs
+
+        return build_anthropic_kwargs(
+            model=model,
+            messages=messages,
+            tools=tools,
+            max_tokens=params.get("max_tokens", 16384),
+            reasoning_config=params.get("reasoning_config"),
+            tool_choice=params.get("tool_choice"),
+            is_oauth=params.get("is_oauth", False),
+            preserve_dots=params.get("preserve_dots", False),
+            context_length=params.get("context_length"),
+            base_url=params.get("base_url"),
+            fast_mode=params.get("fast_mode", False),
+        )
+
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        from agent.anthropic_adapter import normalize_anthropic_response_v2
+
+        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
+        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
+
+    def validate_response(self, response: Any) -> bool:
+        if response is None:
+            return False
+        content_blocks = getattr(response, "content", None)
+        if not isinstance(content_blocks, list):
+            return False
+        if not content_blocks:
+            return False
+        return True
+
+    def extract_cache_stats(self, response: Any):
+        usage = getattr(response, "usage", None)
+        if usage is None:
+            return None
+        cached = getattr(usage, "cache_read_input_tokens", 0) or 0
+        written = getattr(usage, "cache_creation_input_tokens", 0) or 0
+        if cached or written:
+            return {"cached_tokens": cached, "creation_tokens": written}
+        return None
+
+    _STOP_REASON_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        return self._STOP_REASON_MAP.get(raw_reason, "stop")
+
+
+from agent.transports import register_transport  # noqa: E402
+
+register_transport("anthropic_messages", AnthropicTransport)
--- a/agent/transports/base.py
+++ b/agent/transports/base.py
@@ -0,0 +1,61 @@
+"""Abstract base for provider transports.
+
+A transport owns the data path for one api_mode:
+  convert_messages → convert_tools → build_kwargs → normalize_response
+
+It does NOT own: client construction, streaming, credential refresh,
+prompt caching, interrupt handling, or retry logic. Those stay on AIAgent.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+from agent.transports.types import NormalizedResponse
+
+
+class ProviderTransport(ABC):
+    """Base class for provider-specific format conversion and normalization."""
+
+    @property
+    @abstractmethod
+    def api_mode(self) -> str:
+        """The api_mode string this transport handles."""
+        ...
+
+    @abstractmethod
+    def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
+        """Convert OpenAI-format messages to provider-native format."""
+        ...
+
+    @abstractmethod
+    def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
+        """Convert OpenAI-format tool definitions to provider-native format."""
+        ...
+
+    @abstractmethod
+    def build_kwargs(
+        self,
+        model: str,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List[Dict[str, Any]]] = None,
+        **params,
+    ) -> Dict[str, Any]:
+        """Build the complete provider kwargs dict."""
+        ...
+
+    @abstractmethod
+    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
+        """Normalize a raw provider response to the shared NormalizedResponse type."""
+        ...
+
+    def validate_response(self, response: Any) -> bool:
+        """Optional structural validation for raw responses."""
+        return True
+
+    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+        """Optional cache stats extraction."""
+        return None
+
+    def map_finish_reason(self, raw_reason: str) -> str:
+        """Optional stop-reason mapping. Defaults to passthrough."""
+        return raw_reason
--- a/agent/transports/types.py
+++ b/agent/transports/types.py
@@ -0,0 +1,58 @@
+"""Shared types for normalized provider responses."""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class ToolCall:
+    """A normalized tool call from any provider."""
+
+    id: Optional[str]
+    name: str
+    arguments: str
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+@dataclass
+class Usage:
+    """Token usage from an API response."""
+
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    total_tokens: int = 0
+    cached_tokens: int = 0
+
+
+@dataclass
+class NormalizedResponse:
+    """Normalized API response from any provider."""
+
+    content: Optional[str]
+    tool_calls: Optional[List[ToolCall]]
+    finish_reason: str
+    reasoning: Optional[str] = None
+    usage: Optional[Usage] = None
+    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+
+
+def build_tool_call(
+    id: Optional[str],
+    name: str,
+    arguments: Any,
+    **provider_fields: Any,
+) -> ToolCall:
+    """Build a ToolCall, auto-serialising dict arguments."""
+    args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
+    provider_data = dict(provider_fields) if provider_fields else None
+    return ToolCall(id=id, name=name, arguments=args_str, provider_data=provider_data)
+
+
+def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
+    """Translate a provider-specific stop reason to the normalized set."""
+    if reason is None:
+        return "stop"
+    return mapping.get(reason, "stop")
--- a/docs/issue-954-verification.md
+++ b/docs/issue-954-verification.md
@@ -1,100 +0,0 @@
-# Issue #954 Verification — maps skill guest_house / camp_site / bakery
-
-Status: PASS
-
-## Drift noted
-
-Issue #954 asked for validation on `upstream/main` (commit `c5a814b23`).
-Fresh `forge/main` did not contain `skills/productivity/maps/`, so the forge branch was behind upstream for this feature cluster.
-This branch ports the upstream maps skill files into the forge checkout and adds regression coverage.
-
-## Automated verification
-
-Command:
-
-```bash
-pytest -q tests/skills/test_maps_client.py
-```
-
-Result:
-
- 5 passed
-
-Coverage added:
-
- maps skill files exist in the repo
- `guest_house` category maps to `tourism=guest_house`
- `camp_site` category maps to `tourism=camp_site`
- `bakery` expands to both `shop=bakery` and `amenity=bakery`
- dual-key bakery results dedupe correctly
- skill documentation lists the new categories and supersedes `find-nearby`
-
-## Manual evidence
-
-### 1) guest_house lookup
-
-Command:
-
-```bash
-python3 skills/productivity/maps/scripts/maps_client.py nearby --near "Bath, United Kingdom" --category guest_house --limit 3
-```
-
-Observed results:
-
- Henrietta House — 390.3 m
- The Windsor — 437.2 m
- The Old Rectory Bed & Breakfast — 495.7 m
-
-All returned `tourism=guest_house` in the raw tags.
-
-### 2) camp_site lookup
-
-Command:
-
-```bash
-python3 skills/productivity/maps/scripts/maps_client.py nearby --near "Yosemite Valley, California" --category camp_site --limit 5
-```
-
-Observed result:
-
- Yellow Pine Administrative Campground — 90.3 m
-
-Returned `tourism=camp_site` in the raw tags.
-
-### 3) bakery lookup via `shop=bakery`
-
-Command:
-
-```bash
-python3 skills/productivity/maps/scripts/maps_client.py nearby --near "Lawrenceville, New Jersey" --category bakery --radius 5000 --limit 10
-```
-
-Observed results:
-
- The Gingered Peach — 713.8 m
- WildFlour Bakery — 741.9 m
-
-Both returned `shop=bakery` in the raw tags.
-
-### 4) bakery lookup via `amenity=bakery`
-
-Command:
-
-```bash
-python3 skills/productivity/maps/scripts/maps_client.py nearby --near "20735 Stevens Creek Boulevard, Cupertino, CA" --category bakery --radius 600 --limit 5
-```
-
-Observed result:
-
- Paris Baguette — 28.6 m
-
-Returned `amenity=bakery` in the raw tags (and also includes `shop=bakery`), proving the dual-key union query reaches amenity-tagged bakeries too.
-
-## Conclusion
-
-PASS.
-
- `guest_house` resolves correctly
- `camp_site` resolves correctly
- `bakery` resolves through both supported keys
- forge/main drift from upstream/main was real and is addressed on this branch
--- a/skills/productivity/maps/SKILL.md
+++ b/skills/productivity/maps/SKILL.md
@@ -1,199 +0,0 @@
---
-name: maps
-description: >
-  Location intelligence — geocode a place, reverse-geocode coordinates,
-  find nearby places (46 POI categories), driving/walking/cycling
-  distance + time, turn-by-turn directions, timezone lookup, bounding
-  box + area for a named place, and POI search within a rectangle.
-  Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
-version: 1.2.0
-author: Mibayy
-license: MIT
-metadata:
-  hermes:
-    tags: [maps, geocoding, places, routing, distance, directions, nearby, location, openstreetmap, nominatim, overpass, osrm]
-    category: productivity
-    requires_toolsets: [terminal]
-    supersedes: [find-nearby]
---
-
-# Maps Skill
-
-Location intelligence using free, open data sources. 8 commands, 44 POI
-categories, zero dependencies (Python stdlib only), no API key required.
-
-Data sources: OpenStreetMap/Nominatim, Overpass API, OSRM, TimeAPI.io.
-
-This skill supersedes the old `find-nearby` skill — all of find-nearby's
-functionality is covered by the `nearby` command below, with the same
-`--near "<place>"` shortcut and multi-category support.
-
-## When to Use
-
- User sends a Telegram location pin (latitude/longitude in the message) → `nearby`
- User wants coordinates for a place name → `search`
- User has coordinates and wants the address → `reverse`
- User asks for nearby restaurants, hospitals, pharmacies, hotels, etc. → `nearby`
- User wants driving/walking/cycling distance or travel time → `distance`
- User wants turn-by-turn directions between two places → `directions`
- User wants timezone information for a location → `timezone`
- User wants to search for POIs within a geographic area → `area` + `bbox`
-
-## Prerequisites
-
-Python 3.8+ (stdlib only — no pip installs needed).
-
-Script path: `~/.hermes/skills/maps/scripts/maps_client.py`
-
-## Commands
-
-```bash
-MAPS=~/.hermes/skills/maps/scripts/maps_client.py
-```
-
-### search — Geocode a place name
-
-```bash
-python3 $MAPS search "Eiffel Tower"
-python3 $MAPS search "1600 Pennsylvania Ave, Washington DC"
-```
-
-Returns: lat, lon, display name, type, bounding box, importance score.
-
-### reverse — Coordinates to address
-
-```bash
-python3 $MAPS reverse 48.8584 2.2945
-```
-
-Returns: full address breakdown (street, city, state, country, postcode).
-
-### nearby — Find places by category
-
-```bash
-# By coordinates (from a Telegram location pin, for example)
-python3 $MAPS nearby 48.8584 2.2945 restaurant --limit 10
-python3 $MAPS nearby 40.7128 -74.0060 hospital --radius 2000
-
-# By address / city / zip / landmark — --near auto-geocodes
-python3 $MAPS nearby --near "Times Square, New York" --category cafe
-python3 $MAPS nearby --near "90210" --category pharmacy
-
-# Multiple categories merged into one query
-python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10
-```
-
-46 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, guest_house,
-camp_site, supermarket, atm, gas_station, parking, museum, park, school,
-university, bank, police, fire_station, library, airport, train_station,
-bus_stop, church, mosque, synagogue, dentist, doctor, cinema, theatre, gym,
-swimming_pool, post_office, convenience_store, bakery, bookshop, laundry,
-car_wash, car_rental, bicycle_rental, taxi, veterinary, zoo, playground,
-stadium, nightclub.
-
-Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`,
-`maps_url` (clickable Google Maps link), `directions_url` (Google Maps
-directions from the search point), and promoted tags when available —
-`cuisine`, `hours` (opening_hours), `phone`, `website`.
-
-### distance — Travel distance and time
-
-```bash
-python3 $MAPS distance "Paris" --to "Lyon"
-python3 $MAPS distance "New York" --to "Boston" --mode driving
-python3 $MAPS distance "Big Ben" --to "Tower Bridge" --mode walking
-```
-
-Modes: driving (default), walking, cycling. Returns road distance, duration,
-and straight-line distance for comparison.
-
-### directions — Turn-by-turn navigation
-
-```bash
-python3 $MAPS directions "Eiffel Tower" --to "Louvre Museum" --mode walking
-python3 $MAPS directions "JFK Airport" --to "Times Square" --mode driving
-```
-
-Returns numbered steps with instruction, distance, duration, road name, and
-maneuver type (turn, depart, arrive, etc.).
-
-### timezone — Timezone for coordinates
-
-```bash
-python3 $MAPS timezone 48.8584 2.2945
-python3 $MAPS timezone 35.6762 139.6503
-```
-
-Returns timezone name, UTC offset, and current local time.
-
-### area — Bounding box and area for a place
-
-```bash
-python3 $MAPS area "Manhattan, New York"
-python3 $MAPS area "London"
-```
-
-Returns bounding box coordinates, width/height in km, and approximate area.
-Useful as input for the bbox command.
-
-### bbox — Search within a bounding box
-
-```bash
-python3 $MAPS bbox 40.75 -74.00 40.77 -73.98 restaurant --limit 20
-```
-
-Finds POIs within a geographic rectangle. Use `area` first to get the
-bounding box coordinates for a named place.
-
-## Working With Telegram Location Pins
-
-When a user sends a location pin, the message contains `latitude:` and
-`longitude:` fields. Extract those and pass them straight to `nearby`:
-
-```bash
-# User sent a pin at 36.17, -115.14 and asked "find cafes nearby"
-python3 $MAPS nearby 36.17 -115.14 cafe --radius 1500
-```
-
-Present results as a numbered list with names, distances, and the
-`maps_url` field so the user gets a tap-to-open link in chat. For "open
-now?" questions, check the `hours` field; if missing or unclear, verify
-with `web_search` since OSM hours are community-maintained and not always
-current.
-
-## Workflow Examples
-
-**"Find Italian restaurants near the Colosseum":**
-1. `nearby --near "Colosseum Rome" --category restaurant --radius 500`
-   — one command, auto-geocoded
-
-**"What's near this location pin they sent?":**
-1. Extract lat/lon from the Telegram message
-2. `nearby LAT LON cafe --radius 1500`
-
-**"How do I walk from hotel to conference center?":**
-1. `directions "Hotel Name" --to "Conference Center" --mode walking`
-
-**"What restaurants are in downtown Seattle?":**
-1. `area "Downtown Seattle"` → get bounding box
-2. `bbox S W N E restaurant --limit 30`
-
-## Pitfalls
-
- Nominatim ToS: max 1 req/s (handled automatically by the script)
- `nearby` requires lat/lon OR `--near "<address>"` — one of the two is needed
- OSRM routing coverage is best for Europe and North America
- Overpass API can be slow during peak hours; the script automatically
-  falls back between mirrors (overpass-api.de → overpass.kumi.systems)
- `distance` and `directions` use `--to` flag for the destination (not positional)
- If a zip code alone gives ambiguous results globally, include country/state
-
-## Verification
-
-```bash
-python3 ~/.hermes/skills/maps/scripts/maps_client.py search "Statue of Liberty"
-# Should return lat ~40.689, lon ~-74.044
-
-python3 ~/.hermes/skills/maps/scripts/maps_client.py nearby --near "Times Square" --category restaurant --limit 3
-# Should return a list of restaurants within ~500m of Times Square
-```
--- a/skills/productivity/maps/scripts/maps_client.py
+++ b/skills/productivity/maps/scripts/maps_client.py
--- a/tests/agent/test_anthropic_normalize_v2.py
+++ b/tests/agent/test_anthropic_normalize_v2.py
@@ -0,0 +1,213 @@
+"""Regression tests: normalize_anthropic_response_v2 vs v1.
+
+Constructs mock Anthropic responses and asserts that the v2 function
+(returning NormalizedResponse) produces identical field values to the
+original v1 function (returning SimpleNamespace + finish_reason).
+"""
+
+from types import SimpleNamespace
+
+import pytest
+
+from agent.anthropic_adapter import (
+    normalize_anthropic_response,
+    normalize_anthropic_response_v2,
+)
+from agent.transports.types import NormalizedResponse
+
+
+def _text_block(text: str):
+    return SimpleNamespace(type="text", text=text)
+
+
+def _thinking_block(thinking: str, signature: str = "sig_abc"):
+    return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
+
+
+def _tool_use_block(id: str, name: str, input: dict):
+    return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
+
+
+def _response(content_blocks, stop_reason="end_turn"):
+    return SimpleNamespace(
+        content=content_blocks,
+        stop_reason=stop_reason,
+        usage=SimpleNamespace(input_tokens=10, output_tokens=5),
+    )
+
+
+class TestTextOnly:
+    def setup_method(self):
+        self.resp = _response([_text_block("Hello world")])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_type(self):
+        assert isinstance(self.v2, NormalizedResponse)
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_finish_reason_matches(self):
+        assert self.v2.finish_reason == self.v1_finish
+
+    def test_no_tool_calls(self):
+        assert self.v2.tool_calls is None
+        assert self.v1_msg.tool_calls is None
+
+    def test_no_reasoning(self):
+        assert self.v2.reasoning is None
+        assert self.v1_msg.reasoning is None
+
+
+class TestWithToolCalls:
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _text_block("I'll check that"),
+                _tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
+                _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_finish_reason(self):
+        assert self.v2.finish_reason == "tool_calls"
+        assert self.v1_finish == "tool_calls"
+
+    def test_tool_call_count(self):
+        assert len(self.v2.tool_calls) == 2
+        assert len(self.v1_msg.tool_calls) == 2
+
+    def test_tool_call_ids_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
+
+    def test_tool_call_names_match(self):
+        assert self.v2.tool_calls[0].name == "terminal"
+        assert self.v2.tool_calls[1].name == "read_file"
+        for i in range(2):
+            assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
+
+    def test_tool_call_arguments_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
+
+    def test_content_preserved(self):
+        assert self.v2.content == self.v1_msg.content
+        assert "check that" in self.v2.content
+
+
+class TestWithThinking:
+    def setup_method(self):
+        self.resp = _response([
+            _thinking_block("Let me think about this carefully..."),
+            _text_block("The answer is 42."),
+        ])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+        assert "think about this" in self.v2.reasoning
+
+    def test_reasoning_details_in_provider_data(self):
+        v1_details = self.v1_msg.reasoning_details
+        v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
+        assert v1_details is not None
+        assert v2_details is not None
+        assert len(v2_details) == len(v1_details)
+
+    def test_content_excludes_thinking(self):
+        assert self.v2.content == "The answer is 42."
+
+
+class TestMixed:
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _thinking_block("Planning my approach..."),
+                _text_block("I'll run the command"),
+                _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_all_fields_present(self):
+        assert self.v2.content is not None
+        assert self.v2.tool_calls is not None
+        assert self.v2.reasoning is not None
+        assert self.v2.finish_reason == "tool_calls"
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+
+    def test_tool_call_matches(self):
+        assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
+        assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
+
+
+class TestStopReasons:
+    @pytest.mark.parametrize("stop_reason,expected", [
+        ("end_turn", "stop"),
+        ("tool_use", "tool_calls"),
+        ("max_tokens", "length"),
+        ("stop_sequence", "stop"),
+        ("refusal", "content_filter"),
+        ("model_context_window_exceeded", "length"),
+        ("unknown_future_reason", "stop"),
+    ])
+    def test_stop_reason_mapping(self, stop_reason, expected):
+        resp = _response([_text_block("x")], stop_reason=stop_reason)
+        _v1_msg, v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.finish_reason == v1_finish == expected
+
+
+class TestStripToolPrefix:
+    def test_prefix_stripped(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
+        assert v1_msg.tool_calls[0].function.name == "terminal"
+        assert v2.tool_calls[0].name == "terminal"
+
+    def test_prefix_kept(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
+        assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
+        assert v2.tool_calls[0].name == "mcp_terminal"
+
+
+class TestEdgeCases:
+    def test_empty_content_blocks(self):
+        resp = _response([])
+        v1_msg, _v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.content == v1_msg.content
+        assert v2.content is None
+
+    def test_no_reasoning_details_means_none_provider_data(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.provider_data is None
+
+    def test_v2_returns_dataclass_not_namespace(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert isinstance(v2, NormalizedResponse)
+        assert not isinstance(v2, SimpleNamespace)
--- a/tests/agent/transports/test_transport.py
+++ b/tests/agent/transports/test_transport.py
@@ -0,0 +1,208 @@
+"""Tests for the transport ABC, registry, and AnthropicTransport."""
+
+from types import SimpleNamespace
+
+import pytest
+
+from agent.transports import _REGISTRY, get_transport, register_transport
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse
+
+
+class TestProviderTransportABC:
+    def test_cannot_instantiate_abc(self):
+        with pytest.raises(TypeError):
+            ProviderTransport()
+
+    def test_concrete_must_implement_all_abstract(self):
+        class Incomplete(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test"
+
+        with pytest.raises(TypeError):
+            Incomplete()
+
+    def test_minimal_concrete(self):
+        class Minimal(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test_minimal"
+
+            def convert_messages(self, messages, **kw):
+                return messages
+
+            def convert_tools(self, tools):
+                return tools
+
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {"model": model, "messages": messages}
+
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
+
+        t = Minimal()
+        assert t.api_mode == "test_minimal"
+        assert t.validate_response(None) is True
+        assert t.extract_cache_stats(None) is None
+        assert t.map_finish_reason("end_turn") == "end_turn"
+
+
+class TestTransportRegistry:
+    def test_get_unregistered_returns_none(self):
+        assert get_transport("nonexistent_mode") is None
+
+    def test_anthropic_registered_on_import(self):
+        import agent.transports.anthropic  # noqa: F401
+
+        t = get_transport("anthropic_messages")
+        assert t is not None
+        assert t.api_mode == "anthropic_messages"
+
+    def test_register_and_get(self):
+        class DummyTransport(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "dummy_test"
+
+            def convert_messages(self, messages, **kw):
+                return messages
+
+            def convert_tools(self, tools):
+                return tools
+
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {}
+
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
+
+        register_transport("dummy_test", DummyTransport)
+        t = get_transport("dummy_test")
+        assert t.api_mode == "dummy_test"
+        _REGISTRY.pop("dummy_test", None)
+
+
+class TestAnthropicTransport:
+    @pytest.fixture
+    def transport(self):
+        import agent.transports.anthropic  # noqa: F401
+
+        return get_transport("anthropic_messages")
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "anthropic_messages"
+
+    def test_convert_tools_simple(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "test_tool",
+                "description": "A test",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["name"] == "test_tool"
+        assert "input_schema" in result[0]
+
+    def test_validate_response_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_validate_response_empty_content(self, transport):
+        r = SimpleNamespace(content=[])
+        assert transport.validate_response(r) is False
+
+    def test_validate_response_valid(self, transport):
+        r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
+        assert transport.validate_response(r) is True
+
+    def test_map_finish_reason(self, transport):
+        assert transport.map_finish_reason("end_turn") == "stop"
+        assert transport.map_finish_reason("tool_use") == "tool_calls"
+        assert transport.map_finish_reason("max_tokens") == "length"
+        assert transport.map_finish_reason("stop_sequence") == "stop"
+        assert transport.map_finish_reason("refusal") == "content_filter"
+        assert transport.map_finish_reason("model_context_window_exceeded") == "length"
+        assert transport.map_finish_reason("unknown") == "stop"
+
+    def test_extract_cache_stats_none_usage(self, transport):
+        r = SimpleNamespace(usage=None)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_extract_cache_stats_with_cache(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
+        r = SimpleNamespace(usage=usage)
+        result = transport.extract_cache_stats(r)
+        assert result == {"cached_tokens": 100, "creation_tokens": 50}
+
+    def test_extract_cache_stats_zero(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
+        r = SimpleNamespace(usage=usage)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_normalize_response_text(self, transport):
+        r = SimpleNamespace(
+            content=[SimpleNamespace(type="text", text="Hello world")],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=5),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.tool_calls is None or nr.tool_calls == []
+        assert nr.finish_reason == "stop"
+
+    def test_normalize_response_tool_calls(self, transport):
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(type="tool_use", id="toolu_123", name="terminal", input={"command": "ls"}),
+            ],
+            stop_reason="tool_use",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=20),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        tc = nr.tool_calls[0]
+        assert tc.name == "terminal"
+        assert tc.id == "toolu_123"
+        assert '"command"' in tc.arguments
+
+    def test_normalize_response_thinking(self, transport):
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(type="thinking", thinking="Let me think..."),
+                SimpleNamespace(type="text", text="The answer is 42"),
+            ],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=15),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.content == "The answer is 42"
+        assert nr.reasoning == "Let me think..."
+
+    def test_build_kwargs_returns_dict(self, transport):
+        messages = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(
+            model="claude-sonnet-4-6",
+            messages=messages,
+            max_tokens=1024,
+        )
+        assert isinstance(kw, dict)
+        assert "model" in kw
+        assert "max_tokens" in kw
+        assert "messages" in kw
+
+    def test_convert_messages_extracts_system(self, transport):
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hi"},
+        ]
+        system, msgs = transport.convert_messages(messages)
+        assert system is not None
+        assert len(msgs) >= 1
--- a/tests/agent/transports/test_types.py
+++ b/tests/agent/transports/test_types.py
@@ -0,0 +1,130 @@
+"""Tests for agent/transports/types.py — dataclass construction + helpers."""
+
+import json
+
+from agent.transports.types import (
+    NormalizedResponse,
+    ToolCall,
+    Usage,
+    build_tool_call,
+    map_finish_reason,
+)
+
+
+class TestToolCall:
+    def test_basic_construction(self):
+        tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}')
+        assert tc.id == "call_abc"
+        assert tc.name == "terminal"
+        assert tc.arguments == '{"cmd": "ls"}'
+        assert tc.provider_data is None
+
+    def test_none_id(self):
+        tc = ToolCall(id=None, name="read_file", arguments="{}")
+        assert tc.id is None
+
+    def test_provider_data(self):
+        tc = ToolCall(
+            id="call_x",
+            name="t",
+            arguments="{}",
+            provider_data={"call_id": "call_x", "response_item_id": "fc_x"},
+        )
+        assert tc.provider_data["call_id"] == "call_x"
+        assert tc.provider_data["response_item_id"] == "fc_x"
+
+
+class TestUsage:
+    def test_defaults(self):
+        u = Usage()
+        assert u.prompt_tokens == 0
+        assert u.completion_tokens == 0
+        assert u.total_tokens == 0
+        assert u.cached_tokens == 0
+
+    def test_explicit(self):
+        u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80)
+        assert u.total_tokens == 150
+
+
+class TestNormalizedResponse:
+    def test_text_only(self):
+        r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop")
+        assert r.content == "hello"
+        assert r.tool_calls is None
+        assert r.finish_reason == "stop"
+        assert r.reasoning is None
+        assert r.usage is None
+        assert r.provider_data is None
+
+    def test_with_tool_calls(self):
+        tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')]
+        r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls")
+        assert r.finish_reason == "tool_calls"
+        assert len(r.tool_calls) == 1
+        assert r.tool_calls[0].name == "terminal"
+
+    def test_with_reasoning(self):
+        r = NormalizedResponse(
+            content="answer",
+            tool_calls=None,
+            finish_reason="stop",
+            reasoning="I thought about it",
+        )
+        assert r.reasoning == "I thought about it"
+
+    def test_with_provider_data(self):
+        r = NormalizedResponse(
+            content=None,
+            tool_calls=None,
+            finish_reason="stop",
+            provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]},
+        )
+        assert r.provider_data["reasoning_details"][0]["type"] == "thinking"
+
+
+class TestBuildToolCall:
+    def test_dict_arguments_serialized(self):
+        tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"})
+        assert tc.arguments == json.dumps({"cmd": "ls"})
+        assert tc.provider_data is None
+
+    def test_string_arguments_passthrough(self):
+        tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}')
+        assert tc.arguments == '{"path": "/tmp"}'
+
+    def test_provider_fields(self):
+        tc = build_tool_call(
+            id="call_3",
+            name="terminal",
+            arguments="{}",
+            call_id="call_3",
+            response_item_id="fc_3",
+        )
+        assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"}
+
+    def test_none_id(self):
+        tc = build_tool_call(id=None, name="t", arguments="{}")
+        assert tc.id is None
+
+
+class TestMapFinishReason:
+    ANTHROPIC_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+    }
+
+    def test_known_reason(self):
+        assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop"
+        assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls"
+        assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length"
+        assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter"
+
+    def test_unknown_reason_defaults_to_stop(self):
+        assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop"
+
+    def test_none_reason(self):
+        assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"
--- a/tests/skills/test_maps_client.py
+++ b/tests/skills/test_maps_client.py
@@ -1,135 +0,0 @@
-"""Regression tests for the bundled maps skill."""
-
-from __future__ import annotations
-
-import importlib.util
-from pathlib import Path
-from types import SimpleNamespace
-
-SCRIPT_PATH = (
-    Path(__file__).resolve().parents[2]
-    / "skills/productivity/maps/scripts/maps_client.py"
-)
-SKILL_PATH = (
-    Path(__file__).resolve().parents[2]
-    / "skills/productivity/maps/SKILL.md"
-)
-
-
-def load_module():
-    assert SCRIPT_PATH.exists(), f"missing maps client script: {SCRIPT_PATH}"
-    spec = importlib.util.spec_from_file_location("maps_client_test", SCRIPT_PATH)
-    module = importlib.util.module_from_spec(spec)
-    assert spec.loader is not None
-    spec.loader.exec_module(module)
-    return module
-
-
-def test_maps_skill_files_exist():
-    assert SCRIPT_PATH.exists()
-    assert SKILL_PATH.exists()
-
-
-def test_category_tags_cover_guest_house_camp_site_and_dual_key_bakery():
-    module = load_module()
-
-    assert module.CATEGORY_TAGS["guest_house"] == ("tourism", "guest_house")
-    assert module.CATEGORY_TAGS["camp_site"] == ("tourism", "camp_site")
-    assert module.CATEGORY_TAGS["bakery"] == [
-        ("shop", "bakery"),
-        ("amenity", "bakery"),
-    ]
-    assert module._tags_for("bakery") == [
-        ("shop", "bakery"),
-        ("amenity", "bakery"),
-    ]
-
-
-def test_build_overpass_queries_include_all_supported_tags():
-    module = load_module()
-
-    bakery_query = module.build_overpass_nearby(
-        None,
-        None,
-        40.0,
-        -74.0,
-        500,
-        10,
-        tag_pairs=module._tags_for("bakery"),
-    )
-    assert 'node["shop"="bakery"]' in bakery_query
-    assert 'way["shop"="bakery"]' in bakery_query
-    assert 'node["amenity"="bakery"]' in bakery_query
-    assert 'way["amenity"="bakery"]' in bakery_query
-
-    guest_house_query = module.build_overpass_nearby(
-        None,
-        None,
-        40.0,
-        -74.0,
-        500,
-        10,
-        tag_pairs=module._tags_for("guest_house"),
-    )
-    assert 'node["tourism"="guest_house"]' in guest_house_query
-    assert 'way["tourism"="guest_house"]' in guest_house_query
-
-    camp_site_bbox = module.build_overpass_bbox(
-        None,
-        None,
-        39.0,
-        -75.0,
-        41.0,
-        -73.0,
-        10,
-        tag_pairs=module._tags_for("camp_site"),
-    )
-    assert 'node["tourism"="camp_site"]' in camp_site_bbox
-    assert 'way["tourism"="camp_site"]' in camp_site_bbox
-
-
-def test_cmd_nearby_dedupes_dual_tag_bakery_results(monkeypatch, capsys):
-    module = load_module()
-
-    duplicate_bakery = {
-        "elements": [
-            {
-                "type": "node",
-                "id": 101,
-                "lat": 40.0,
-                "lon": -74.0,
-                "tags": {"name": "Wild Flour", "shop": "bakery"},
-            },
-            {
-                "type": "node",
-                "id": 101,
-                "lat": 40.0,
-                "lon": -74.0,
-                "tags": {"name": "Wild Flour", "amenity": "bakery"},
-            },
-        ]
-    }
-
-    monkeypatch.setattr(module, "overpass_query", lambda query: duplicate_bakery)
-    args = SimpleNamespace(
-        lat="40.0",
-        lon="-74.0",
-        near=None,
-        category="bakery",
-        category_list=[],
-        radius=500,
-        limit=10,
-    )
-
-    module.cmd_nearby(args)
-    out = capsys.readouterr().out
-    assert '"count": 1' in out
-    assert '"Wild Flour"' in out
-
-
-def test_skill_doc_lists_new_categories_and_supersession():
-    text = SKILL_PATH.read_text(encoding="utf-8")
-    assert "guest_house" in text
-    assert "camp_site" in text
-    assert "bakery" in text
-    assert "supersedes: [find-nearby]" in text