feat: native Anthropic provider with Claude Code credential auto-discovery

Add Anthropic as a first-class inference provider, bypassing OpenRouter
for direct API access. Uses the native Anthropic SDK with a full format
adapter (same pattern as the codex_responses api_mode).

## Auth (three methods, priority order)
1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*)
2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*)
3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription)
   - Reads Claude Code's OAuth credentials
   - Checks token expiry with 60s buffer
   - Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header
   - Regular API keys use standard x-api-key header

## Changes by file

### New files
- agent/anthropic_adapter.py — Client builder, message/tool/response
  format conversion, Claude Code credential reader, token resolver.
  Handles system prompt extraction, tool_use/tool_result blocks,
  thinking/reasoning, orphaned tool_use cleanup, cache_control.
- tests/test_anthropic_adapter.py — 36 tests covering all adapter logic

### Modified files
- pyproject.toml — Add anthropic>=0.39.0 dependency
- hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with
  three env vars, plus 'claude'/'claude-code' aliases
- hermes_cli/models.py — Add model catalog, labels, aliases, provider order
- hermes_cli/main.py — Add 'anthropic' to --provider CLI choices
- hermes_cli/runtime_provider.py — Add Anthropic branch returning
  api_mode='anthropic_messages' (before generic api_key fallthrough)
- hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code
  credential auto-discovery, model selection, OpenRouter tools prompt
- agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model
- agent/model_metadata.py — Add bare Claude model context lengths
- run_agent.py — Add anthropic_messages api_mode:
  * Client init (Anthropic SDK instead of OpenAI)
  * API call dispatch (_anthropic_client.messages.create)
  * Response validation (content blocks)
  * finish_reason mapping (stop_reason -> finish_reason)
  * Token usage (input_tokens/output_tokens)
  * Response normalization (normalize_anthropic_response)
  * Client interrupt/rebuild
  * Prompt caching auto-enabled for native Anthropic
- tests/test_run_agent.py — Update test_anthropic_base_url_accepted to
  expect native routing, add test_prompt_caching_native_anthropic
This commit is contained in:
teknium1
2026-03-12 15:47:45 -07:00
parent 6b211bf008
commit 5e12442b4b
12 changed files with 1002 additions and 65 deletions

View File

@@ -296,13 +296,16 @@ class AIAgent:
self.base_url = base_url or OPENROUTER_BASE_URL
provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
self.provider = provider_name or "openrouter"
if api_mode in {"chat_completions", "codex_responses"}:
if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}:
self.api_mode = api_mode
elif self.provider == "openai-codex":
self.api_mode = "codex_responses"
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
self.api_mode = "codex_responses"
self.provider = "openai-codex"
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self.base_url.lower()):
self.api_mode = "anthropic_messages"
self.provider = "anthropic"
else:
self.api_mode = "chat_completions"
@@ -343,7 +346,8 @@ class AIAgent:
# conversation prefix. Uses system_and_3 strategy (4 breakpoints).
is_openrouter = "openrouter" in self.base_url.lower()
is_claude = "claude" in self.model.lower()
self._use_prompt_caching = is_openrouter and is_claude
is_native_anthropic = self.api_mode == "anthropic_messages"
self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
# Iteration budget pressure: warn the LLM as it approaches max_iterations.
@@ -420,66 +424,84 @@ class AIAgent:
]:
logging.getLogger(quiet_logger).setLevel(logging.ERROR)
# Initialize OpenAI client via centralized provider router.
# Initialize LLM client via centralized provider router.
# The router handles auth resolution, base URL, headers, and
# Codex wrapping for all known providers.
# Codex/Anthropic wrapping for all known providers.
# raw_codex=True because the main agent needs direct responses.stream()
# access for Codex Responses API streaming.
if api_key and base_url:
# Explicit credentials from CLI/gateway — construct directly.
# The runtime provider resolver already handled auth for us.
client_kwargs = {"api_key": api_key, "base_url": base_url}
effective_base = base_url
if "openrouter" in effective_base.lower():
client_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
elif "api.kimi.com" in effective_base.lower():
client_kwargs["default_headers"] = {
"User-Agent": "KimiCLI/1.3",
}
self._anthropic_client = None
if self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import build_anthropic_client
effective_key = api_key or os.getenv("ANTHROPIC_API_KEY", "") or os.getenv("ANTHROPIC_TOKEN", "")
if not effective_key:
from agent.anthropic_adapter import resolve_anthropic_token
effective_key = resolve_anthropic_token() or ""
self._anthropic_api_key = effective_key
self._anthropic_client = build_anthropic_client(effective_key, base_url if base_url and "anthropic" in base_url else None)
# No OpenAI client needed for Anthropic mode
self.client = None
self._client_kwargs = {}
if not self.quiet_mode:
print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)")
if effective_key and len(effective_key) > 12:
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
else:
# No explicit creds — use the centralized provider router
from agent.auxiliary_client import resolve_provider_client
_routed_client, _ = resolve_provider_client(
self.provider or "auto", model=self.model, raw_codex=True)
if _routed_client is not None:
client_kwargs = {
"api_key": _routed_client.api_key,
"base_url": str(_routed_client.base_url),
}
# Preserve any default_headers the router set
if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
client_kwargs["default_headers"] = dict(_routed_client._default_headers)
else:
# Final fallback: try raw OpenRouter key
client_kwargs = {
"api_key": os.getenv("OPENROUTER_API_KEY", ""),
"base_url": OPENROUTER_BASE_URL,
"default_headers": {
if api_key and base_url:
# Explicit credentials from CLI/gateway — construct directly.
# The runtime provider resolver already handled auth for us.
client_kwargs = {"api_key": api_key, "base_url": base_url}
effective_base = base_url
if "openrouter" in effective_base.lower():
client_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
}
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
try:
self.client = OpenAI(**client_kwargs)
if not self.quiet_mode:
print(f"🤖 AI Agent initialized with model: {self.model}")
if base_url:
print(f"🔗 Using custom base URL: {base_url}")
# Always show API key info (masked) for debugging auth issues
key_used = client_kwargs.get("api_key", "none")
if key_used and key_used != "dummy-key" and len(key_used) > 12:
print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
}
elif "api.kimi.com" in effective_base.lower():
client_kwargs["default_headers"] = {
"User-Agent": "KimiCLI/1.3",
}
else:
# No explicit creds — use the centralized provider router
from agent.auxiliary_client import resolve_provider_client
_routed_client, _ = resolve_provider_client(
self.provider or "auto", model=self.model, raw_codex=True)
if _routed_client is not None:
client_kwargs = {
"api_key": _routed_client.api_key,
"base_url": str(_routed_client.base_url),
}
# Preserve any default_headers the router set
if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
client_kwargs["default_headers"] = dict(_routed_client._default_headers)
else:
print(f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
except Exception as e:
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
# Final fallback: try raw OpenRouter key
client_kwargs = {
"api_key": os.getenv("OPENROUTER_API_KEY", ""),
"base_url": OPENROUTER_BASE_URL,
"default_headers": {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
}
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
try:
self.client = OpenAI(**client_kwargs)
if not self.quiet_mode:
print(f"🤖 AI Agent initialized with model: {self.model}")
if base_url:
print(f"🔗 Using custom base URL: {base_url}")
# Always show API key info (masked) for debugging auth issues
key_used = client_kwargs.get("api_key", "none")
if key_used and key_used != "dummy-key" and len(key_used) > 12:
print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
else:
print(f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
except Exception as e:
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
# Provider fallback — a single backup model/provider tried when the
# primary is exhausted (rate-limit, overload, connection failure).
@@ -533,7 +555,8 @@ class AIAgent:
# Show prompt caching status
if self._use_prompt_caching and not self.quiet_mode:
print(f"💾 Prompt caching: ENABLED (Claude via OpenRouter, {self._cache_ttl} TTL)")
source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter"
print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)")
# Session logging setup - auto-save conversation trajectories for debugging
self.session_start = datetime.now()
@@ -2233,6 +2256,8 @@ class AIAgent:
try:
if self.api_mode == "codex_responses":
result["response"] = self._run_codex_stream(api_kwargs)
elif self.api_mode == "anthropic_messages":
result["response"] = self._anthropic_client.messages.create(**api_kwargs)
else:
result["response"] = self.client.chat.completions.create(**api_kwargs)
except Exception as e:
@@ -2245,12 +2270,19 @@ class AIAgent:
if self._interrupt_requested:
# Force-close the HTTP connection to stop token generation
try:
self.client.close()
if self.api_mode == "anthropic_messages":
self._anthropic_client.close()
else:
self.client.close()
except Exception:
pass
# Rebuild the client for future calls (cheap, no network)
try:
self.client = OpenAI(**self._client_kwargs)
if self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import build_anthropic_client
self._anthropic_client = build_anthropic_client(self._anthropic_api_key)
else:
self.client = OpenAI(**self._client_kwargs)
except Exception:
pass
raise InterruptedError("Agent interrupted during API call")
@@ -2336,6 +2368,16 @@ class AIAgent:
def _build_api_kwargs(self, api_messages: list) -> dict:
"""Build the keyword arguments dict for the active API mode."""
if self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import build_anthropic_kwargs
return build_anthropic_kwargs(
model=self.model,
messages=api_messages,
tools=self.tools,
max_tokens=None,
reasoning_config=self.reasoning_config,
)
if self.api_mode == "codex_responses":
instructions = ""
payload_messages = api_messages
@@ -3561,6 +3603,17 @@ class AIAgent:
elif len(output_items) == 0:
response_invalid = True
error_details.append("response.output is empty")
elif self.api_mode == "anthropic_messages":
content_blocks = getattr(response, "content", None) if response is not None else None
if response is None:
response_invalid = True
error_details.append("response is None")
elif not isinstance(content_blocks, list):
response_invalid = True
error_details.append("response.content is not a list")
elif len(content_blocks) == 0:
response_invalid = True
error_details.append("response.content is empty")
else:
if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
response_invalid = True
@@ -3662,6 +3715,9 @@ class AIAgent:
finish_reason = "length"
else:
finish_reason = "stop"
elif self.api_mode == "anthropic_messages":
stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"}
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
else:
finish_reason = response.choices[0].finish_reason
@@ -3739,7 +3795,7 @@ class AIAgent:
# Track actual token usage from response for context management
if hasattr(response, 'usage') and response.usage:
if self.api_mode == "codex_responses":
if self.api_mode in ("codex_responses", "anthropic_messages"):
prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
total_tokens = (
@@ -4068,6 +4124,9 @@ class AIAgent:
try:
if self.api_mode == "codex_responses":
assistant_message, finish_reason = self._normalize_codex_response(response)
elif self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import normalize_anthropic_response
assistant_message, finish_reason = normalize_anthropic_response(response)
else:
assistant_message = response.choices[0].message