feat: native Anthropic provider with Claude Code credential auto-discovery
Add Anthropic as a first-class inference provider, bypassing OpenRouter for direct API access. Uses the native Anthropic SDK with a full format adapter (same pattern as the codex_responses api_mode). ## Auth (three methods, priority order) 1. ANTHROPIC_API_KEY env var (regular API key, sk-ant-api-*) 2. ANTHROPIC_TOKEN / CLAUDE_CODE_OAUTH_TOKEN env var (setup-token, sk-ant-oat-*) 3. Auto-discovery from ~/.claude/.credentials.json (Claude Code subscription) - Reads Claude Code's OAuth credentials - Checks token expiry with 60s buffer - Setup tokens use Bearer auth + anthropic-beta: oauth-2025-04-20 header - Regular API keys use standard x-api-key header ## Changes by file ### New files - agent/anthropic_adapter.py — Client builder, message/tool/response format conversion, Claude Code credential reader, token resolver. Handles system prompt extraction, tool_use/tool_result blocks, thinking/reasoning, orphaned tool_use cleanup, cache_control. - tests/test_anthropic_adapter.py — 36 tests covering all adapter logic ### Modified files - pyproject.toml — Add anthropic>=0.39.0 dependency - hermes_cli/auth.py — Add 'anthropic' to PROVIDER_REGISTRY with three env vars, plus 'claude'/'claude-code' aliases - hermes_cli/models.py — Add model catalog, labels, aliases, provider order - hermes_cli/main.py — Add 'anthropic' to --provider CLI choices - hermes_cli/runtime_provider.py — Add Anthropic branch returning api_mode='anthropic_messages' (before generic api_key fallthrough) - hermes_cli/setup.py — Add Anthropic setup wizard with Claude Code credential auto-discovery, model selection, OpenRouter tools prompt - agent/auxiliary_client.py — Add claude-haiku-4-5 as aux model - agent/model_metadata.py — Add bare Claude model context lengths - run_agent.py — Add anthropic_messages api_mode: * Client init (Anthropic SDK instead of OpenAI) * API call dispatch (_anthropic_client.messages.create) * Response validation (content blocks) * finish_reason mapping (stop_reason -> finish_reason) * Token usage (input_tokens/output_tokens) * Response normalization (normalize_anthropic_response) * Client interrupt/rebuild * Prompt caching auto-enabled for native Anthropic - tests/test_run_agent.py — Update test_anthropic_base_url_accepted to expect native routing, add test_prompt_caching_native_anthropic
This commit is contained in:
175
run_agent.py
175
run_agent.py
@@ -296,13 +296,16 @@ class AIAgent:
|
||||
self.base_url = base_url or OPENROUTER_BASE_URL
|
||||
provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
|
||||
self.provider = provider_name or "openrouter"
|
||||
if api_mode in {"chat_completions", "codex_responses"}:
|
||||
if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}:
|
||||
self.api_mode = api_mode
|
||||
elif self.provider == "openai-codex":
|
||||
self.api_mode = "codex_responses"
|
||||
elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
|
||||
self.api_mode = "codex_responses"
|
||||
self.provider = "openai-codex"
|
||||
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self.base_url.lower()):
|
||||
self.api_mode = "anthropic_messages"
|
||||
self.provider = "anthropic"
|
||||
else:
|
||||
self.api_mode = "chat_completions"
|
||||
|
||||
@@ -343,7 +346,8 @@ class AIAgent:
|
||||
# conversation prefix. Uses system_and_3 strategy (4 breakpoints).
|
||||
is_openrouter = "openrouter" in self.base_url.lower()
|
||||
is_claude = "claude" in self.model.lower()
|
||||
self._use_prompt_caching = is_openrouter and is_claude
|
||||
is_native_anthropic = self.api_mode == "anthropic_messages"
|
||||
self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
|
||||
self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost)
|
||||
|
||||
# Iteration budget pressure: warn the LLM as it approaches max_iterations.
|
||||
@@ -420,66 +424,84 @@ class AIAgent:
|
||||
]:
|
||||
logging.getLogger(quiet_logger).setLevel(logging.ERROR)
|
||||
|
||||
# Initialize OpenAI client via centralized provider router.
|
||||
# Initialize LLM client via centralized provider router.
|
||||
# The router handles auth resolution, base URL, headers, and
|
||||
# Codex wrapping for all known providers.
|
||||
# Codex/Anthropic wrapping for all known providers.
|
||||
# raw_codex=True because the main agent needs direct responses.stream()
|
||||
# access for Codex Responses API streaming.
|
||||
if api_key and base_url:
|
||||
# Explicit credentials from CLI/gateway — construct directly.
|
||||
# The runtime provider resolver already handled auth for us.
|
||||
client_kwargs = {"api_key": api_key, "base_url": base_url}
|
||||
effective_base = base_url
|
||||
if "openrouter" in effective_base.lower():
|
||||
client_kwargs["default_headers"] = {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
}
|
||||
elif "api.kimi.com" in effective_base.lower():
|
||||
client_kwargs["default_headers"] = {
|
||||
"User-Agent": "KimiCLI/1.3",
|
||||
}
|
||||
self._anthropic_client = None
|
||||
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
effective_key = api_key or os.getenv("ANTHROPIC_API_KEY", "") or os.getenv("ANTHROPIC_TOKEN", "")
|
||||
if not effective_key:
|
||||
from agent.anthropic_adapter import resolve_anthropic_token
|
||||
effective_key = resolve_anthropic_token() or ""
|
||||
self._anthropic_api_key = effective_key
|
||||
self._anthropic_client = build_anthropic_client(effective_key, base_url if base_url and "anthropic" in base_url else None)
|
||||
# No OpenAI client needed for Anthropic mode
|
||||
self.client = None
|
||||
self._client_kwargs = {}
|
||||
if not self.quiet_mode:
|
||||
print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)")
|
||||
if effective_key and len(effective_key) > 12:
|
||||
print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
|
||||
else:
|
||||
# No explicit creds — use the centralized provider router
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
_routed_client, _ = resolve_provider_client(
|
||||
self.provider or "auto", model=self.model, raw_codex=True)
|
||||
if _routed_client is not None:
|
||||
client_kwargs = {
|
||||
"api_key": _routed_client.api_key,
|
||||
"base_url": str(_routed_client.base_url),
|
||||
}
|
||||
# Preserve any default_headers the router set
|
||||
if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
|
||||
client_kwargs["default_headers"] = dict(_routed_client._default_headers)
|
||||
else:
|
||||
# Final fallback: try raw OpenRouter key
|
||||
client_kwargs = {
|
||||
"api_key": os.getenv("OPENROUTER_API_KEY", ""),
|
||||
"base_url": OPENROUTER_BASE_URL,
|
||||
"default_headers": {
|
||||
if api_key and base_url:
|
||||
# Explicit credentials from CLI/gateway — construct directly.
|
||||
# The runtime provider resolver already handled auth for us.
|
||||
client_kwargs = {"api_key": api_key, "base_url": base_url}
|
||||
effective_base = base_url
|
||||
if "openrouter" in effective_base.lower():
|
||||
client_kwargs["default_headers"] = {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
},
|
||||
}
|
||||
|
||||
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
|
||||
try:
|
||||
self.client = OpenAI(**client_kwargs)
|
||||
if not self.quiet_mode:
|
||||
print(f"🤖 AI Agent initialized with model: {self.model}")
|
||||
if base_url:
|
||||
print(f"🔗 Using custom base URL: {base_url}")
|
||||
# Always show API key info (masked) for debugging auth issues
|
||||
key_used = client_kwargs.get("api_key", "none")
|
||||
if key_used and key_used != "dummy-key" and len(key_used) > 12:
|
||||
print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
|
||||
}
|
||||
elif "api.kimi.com" in effective_base.lower():
|
||||
client_kwargs["default_headers"] = {
|
||||
"User-Agent": "KimiCLI/1.3",
|
||||
}
|
||||
else:
|
||||
# No explicit creds — use the centralized provider router
|
||||
from agent.auxiliary_client import resolve_provider_client
|
||||
_routed_client, _ = resolve_provider_client(
|
||||
self.provider or "auto", model=self.model, raw_codex=True)
|
||||
if _routed_client is not None:
|
||||
client_kwargs = {
|
||||
"api_key": _routed_client.api_key,
|
||||
"base_url": str(_routed_client.base_url),
|
||||
}
|
||||
# Preserve any default_headers the router set
|
||||
if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
|
||||
client_kwargs["default_headers"] = dict(_routed_client._default_headers)
|
||||
else:
|
||||
print(f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
|
||||
# Final fallback: try raw OpenRouter key
|
||||
client_kwargs = {
|
||||
"api_key": os.getenv("OPENROUTER_API_KEY", ""),
|
||||
"base_url": OPENROUTER_BASE_URL,
|
||||
"default_headers": {
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
},
|
||||
}
|
||||
|
||||
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
|
||||
try:
|
||||
self.client = OpenAI(**client_kwargs)
|
||||
if not self.quiet_mode:
|
||||
print(f"🤖 AI Agent initialized with model: {self.model}")
|
||||
if base_url:
|
||||
print(f"🔗 Using custom base URL: {base_url}")
|
||||
# Always show API key info (masked) for debugging auth issues
|
||||
key_used = client_kwargs.get("api_key", "none")
|
||||
if key_used and key_used != "dummy-key" and len(key_used) > 12:
|
||||
print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
|
||||
else:
|
||||
print(f"⚠️ Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
|
||||
|
||||
# Provider fallback — a single backup model/provider tried when the
|
||||
# primary is exhausted (rate-limit, overload, connection failure).
|
||||
@@ -533,7 +555,8 @@ class AIAgent:
|
||||
|
||||
# Show prompt caching status
|
||||
if self._use_prompt_caching and not self.quiet_mode:
|
||||
print(f"💾 Prompt caching: ENABLED (Claude via OpenRouter, {self._cache_ttl} TTL)")
|
||||
source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter"
|
||||
print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)")
|
||||
|
||||
# Session logging setup - auto-save conversation trajectories for debugging
|
||||
self.session_start = datetime.now()
|
||||
@@ -2233,6 +2256,8 @@ class AIAgent:
|
||||
try:
|
||||
if self.api_mode == "codex_responses":
|
||||
result["response"] = self._run_codex_stream(api_kwargs)
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
result["response"] = self._anthropic_client.messages.create(**api_kwargs)
|
||||
else:
|
||||
result["response"] = self.client.chat.completions.create(**api_kwargs)
|
||||
except Exception as e:
|
||||
@@ -2245,12 +2270,19 @@ class AIAgent:
|
||||
if self._interrupt_requested:
|
||||
# Force-close the HTTP connection to stop token generation
|
||||
try:
|
||||
self.client.close()
|
||||
if self.api_mode == "anthropic_messages":
|
||||
self._anthropic_client.close()
|
||||
else:
|
||||
self.client.close()
|
||||
except Exception:
|
||||
pass
|
||||
# Rebuild the client for future calls (cheap, no network)
|
||||
try:
|
||||
self.client = OpenAI(**self._client_kwargs)
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_client
|
||||
self._anthropic_client = build_anthropic_client(self._anthropic_api_key)
|
||||
else:
|
||||
self.client = OpenAI(**self._client_kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
raise InterruptedError("Agent interrupted during API call")
|
||||
@@ -2336,6 +2368,16 @@ class AIAgent:
|
||||
|
||||
def _build_api_kwargs(self, api_messages: list) -> dict:
|
||||
"""Build the keyword arguments dict for the active API mode."""
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
return build_anthropic_kwargs(
|
||||
model=self.model,
|
||||
messages=api_messages,
|
||||
tools=self.tools,
|
||||
max_tokens=None,
|
||||
reasoning_config=self.reasoning_config,
|
||||
)
|
||||
|
||||
if self.api_mode == "codex_responses":
|
||||
instructions = ""
|
||||
payload_messages = api_messages
|
||||
@@ -3561,6 +3603,17 @@ class AIAgent:
|
||||
elif len(output_items) == 0:
|
||||
response_invalid = True
|
||||
error_details.append("response.output is empty")
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
content_blocks = getattr(response, "content", None) if response is not None else None
|
||||
if response is None:
|
||||
response_invalid = True
|
||||
error_details.append("response is None")
|
||||
elif not isinstance(content_blocks, list):
|
||||
response_invalid = True
|
||||
error_details.append("response.content is not a list")
|
||||
elif len(content_blocks) == 0:
|
||||
response_invalid = True
|
||||
error_details.append("response.content is empty")
|
||||
else:
|
||||
if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
|
||||
response_invalid = True
|
||||
@@ -3662,6 +3715,9 @@ class AIAgent:
|
||||
finish_reason = "length"
|
||||
else:
|
||||
finish_reason = "stop"
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"}
|
||||
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
|
||||
else:
|
||||
finish_reason = response.choices[0].finish_reason
|
||||
|
||||
@@ -3739,7 +3795,7 @@ class AIAgent:
|
||||
|
||||
# Track actual token usage from response for context management
|
||||
if hasattr(response, 'usage') and response.usage:
|
||||
if self.api_mode == "codex_responses":
|
||||
if self.api_mode in ("codex_responses", "anthropic_messages"):
|
||||
prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
|
||||
completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
|
||||
total_tokens = (
|
||||
@@ -4068,6 +4124,9 @@ class AIAgent:
|
||||
try:
|
||||
if self.api_mode == "codex_responses":
|
||||
assistant_message, finish_reason = self._normalize_codex_response(response)
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import normalize_anthropic_response
|
||||
assistant_message, finish_reason = normalize_anthropic_response(response)
|
||||
else:
|
||||
assistant_message = response.choices[0].message
|
||||
|
||||
|
||||
Reference in New Issue
Block a user