Compare commits

..

2 Commits

Author SHA1 Message Date
Alexander Whitestone
8141bf8ba3 feat: verify AI Gateway provider UX and attribution headers (#950)
All checks were successful
Lint / lint (pull_request) Successful in 9s
Closes #950

- promote Vercel AI Gateway near the top of the provider picker
- add dedicated AI Gateway model flow with Vercel API-key deep link and live pricing
- use curated AI Gateway catalog refresh with free Moonshot auto-promotion
- apply AI Gateway attribution headers on runtime clients
- add targeted QA tests for provider UX and attribution headers
2026-04-22 11:40:49 -04:00
Alexander Whitestone
892c4ab70a wip: add failing AI Gateway QA tests (#950)
- add ai-gateway provider UX, pricing, and Moonshot promotion tests
- add attribution-header regression tests for run_agent base-url handling
2026-04-22 11:30:05 -04:00
8 changed files with 604 additions and 533 deletions

View File

@@ -1,70 +1,43 @@
from __future__ import annotations
"""
A2A agent card generation for fleet discovery.
Agent Card — A2A-compliant agent discovery.
Part of #843: fix: implement A2A agent card for fleet discovery (#819)
Refs #801.
Closes #802.
Provides metadata about the agent's identity, capabilities, and installed skills
for discovery by other agents in the fleet.
"""
import argparse
import json
import logging
import os
import socket
import sys
from dataclasses import asdict, dataclass, field
from typing import Any, Dict, Iterable, List, Mapping, Sequence
from urllib.parse import urlparse, urlunparse
from pathlib import Path
from typing import Any, Dict, List, Optional
from hermes_cli import __version__
from hermes_cli.config import load_config
from hermes_cli.config import load_config, get_hermes_home
from agent.skill_utils import (
get_all_skills_dirs,
get_disabled_skill_names,
iter_skill_index_files,
parse_frontmatter,
skill_matches_platform,
get_all_skills_dirs,
get_disabled_skill_names,
skill_matches_platform
)
logger = logging.getLogger(__name__)
DEFAULT_DESCRIPTION = "Sovereign AI agent — orchestration, code, research"
DEFAULT_INPUT_MODES = ["text/plain", "application/json"]
DEFAULT_OUTPUT_MODES = ["text/plain", "application/json"]
_REQUIRED_CAPABILITY_FLAGS = (
"streaming",
"pushNotifications",
"stateTransitionHistory",
)
@dataclass
class AgentSkill:
id: str
name: str
description: str = ""
tags: List[str] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
data: Dict[str, Any] = {"id": self.id, "name": self.name}
if self.description:
data["description"] = self.description
if self.tags:
data["tags"] = self.tags
return data
version: str = "1.0.0"
@dataclass
class AgentCapabilities:
streaming: bool = True
pushNotifications: bool = False
stateTransitionHistory: bool = True
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
tools: bool = True
vision: bool = False
reasoning: bool = False
@dataclass
class AgentCard:
@@ -74,81 +47,14 @@ class AgentCard:
version: str = __version__
capabilities: AgentCapabilities = field(default_factory=AgentCapabilities)
skills: List[AgentSkill] = field(default_factory=list)
defaultInputModes: List[str] = field(default_factory=lambda: list(DEFAULT_INPUT_MODES))
defaultOutputModes: List[str] = field(default_factory=lambda: list(DEFAULT_OUTPUT_MODES))
metadata: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
data: Dict[str, Any] = {
"name": self.name,
"description": self.description,
"url": self.url,
"version": self.version,
"capabilities": self.capabilities.to_dict(),
"skills": [skill.to_dict() for skill in self.skills],
"defaultInputModes": list(self.defaultInputModes),
"defaultOutputModes": list(self.defaultOutputModes),
}
if self.metadata:
data["metadata"] = dict(self.metadata)
return data
def to_json(self, indent: int = 2) -> str:
return json.dumps(self.to_dict(), indent=indent)
def _env_or_empty(key: str) -> str:
return os.environ.get(key, "").strip()
def _as_agent_config(config: Mapping[str, Any] | None) -> Dict[str, Any]:
if not isinstance(config, Mapping):
return {}
agent_cfg = config.get("agent")
return dict(agent_cfg) if isinstance(agent_cfg, Mapping) else {}
def _as_a2a_config(config: Mapping[str, Any] | None) -> Dict[str, Any]:
if not isinstance(config, Mapping):
return {}
a2a_cfg = config.get("a2a")
return dict(a2a_cfg) if isinstance(a2a_cfg, Mapping) else {}
def _normalize_string_list(value: Any) -> List[str]:
if value is None:
return []
if isinstance(value, str):
parts = value.split(",")
elif isinstance(value, Sequence) and not isinstance(value, (bytes, bytearray, str)):
parts = list(value)
else:
parts = [value]
out: List[str] = []
seen = set()
for item in parts:
text = str(item).strip()
if not text or text in seen:
continue
seen.add(text)
out.append(text)
return out
def _normalize_skill_tags(frontmatter: Mapping[str, Any]) -> List[str]:
tags = _normalize_string_list(frontmatter.get("tags"))
category = str(frontmatter.get("category") or "").strip()
if category and category not in tags:
tags.append(category)
return tags
defaultInputModes: List[str] = field(default_factory=lambda: ["text/plain"])
defaultOutputModes: List[str] = field(default_factory=lambda: ["text/plain"])
def _load_skills() -> List[AgentSkill]:
"""Scan enabled skills and return A2A skill metadata."""
skills: List[AgentSkill] = []
"""Scan all enabled skills and return metadata."""
skills = []
disabled = get_disabled_skill_names()
seen_ids = set()
for skills_dir in get_all_skills_dirs():
if not skills_dir.is_dir():
continue
@@ -159,262 +65,71 @@ def _load_skills() -> List[AgentSkill]:
except Exception:
continue
skill_name = frontmatter.get("name") or skill_file.parent.name
if str(skill_name) in disabled:
continue
if not skill_matches_platform(frontmatter):
continue
skill_id = str(frontmatter.get("name") or skill_file.parent.name).strip().lower().replace(" ", "-")
if skill_id in disabled or skill_id in seen_ids:
continue
seen_ids.add(skill_id)
skills.append(AgentSkill(
id=str(skill_name),
name=str(frontmatter.get("name", skill_name)),
description=str(frontmatter.get("description", "")),
version=str(frontmatter.get("version", "1.0.0"))
))
return skills
display_name = str(frontmatter.get("title") or frontmatter.get("name") or skill_file.parent.name).strip()
description = str(frontmatter.get("description") or "").strip()
tags = _normalize_skill_tags(frontmatter)
skills.append(
AgentSkill(
id=skill_id,
name=display_name,
description=description,
tags=tags,
)
)
def build_agent_card() -> AgentCard:
"""Build the agent card from current configuration and environment."""
config = load_config()
# Identity
name = os.environ.get("HERMES_AGENT_NAME") or config.get("agent", {}).get("name") or "hermes"
description = os.environ.get("HERMES_AGENT_DESCRIPTION") or config.get("agent", {}).get("description") or "Sovereign AI agent"
# URL - try to determine from environment or config
port = os.environ.get("HERMES_WEB_PORT") or "9119"
host = os.environ.get("HERMES_WEB_HOST") or "localhost"
url = f"http://{host}:{port}"
# Capabilities
# In a real scenario, we'd check model metadata for vision/reasoning
capabilities = AgentCapabilities(
streaming=True,
tools=True,
vision=False, # Default to false unless we can confirm
reasoning=False
)
# Skills
skills = _load_skills()
return AgentCard(
name=name,
description=description,
url=url,
version=__version__,
capabilities=capabilities,
skills=skills
)
return sorted(skills, key=lambda skill: skill.id)
def _get_agent_name(config: Mapping[str, Any] | None, override: str | None = None) -> str:
if override:
return override
env_name = _env_or_empty("HERMES_AGENT_NAME") or _env_or_empty("AGENT_NAME")
if env_name:
return env_name
agent_cfg = _as_agent_config(config)
if agent_cfg.get("name"):
return str(agent_cfg["name"]).strip()
def get_agent_card_json() -> str:
"""Return the agent card as a JSON string."""
try:
hostname = socket.gethostname().split(".", 1)[0].strip()
if hostname:
return hostname
except Exception:
pass
return "hermes"
def _get_description(config: Mapping[str, Any] | None, override: str | None = None) -> str:
if override:
return override
env_description = _env_or_empty("HERMES_AGENT_DESCRIPTION") or _env_or_empty("AGENT_DESCRIPTION")
if env_description:
return env_description
agent_cfg = _as_agent_config(config)
if agent_cfg.get("description"):
return str(agent_cfg["description"]).strip()
return DEFAULT_DESCRIPTION
def _normalize_a2a_url(url: str) -> str:
raw = (url or "").strip()
if not raw:
return ""
parsed = urlparse(raw if "://" in raw else f"https://{raw}")
scheme = parsed.scheme or "https"
netloc = parsed.netloc or parsed.path
path = parsed.path if parsed.netloc else ""
normalized_path = path.rstrip("/") if path not in ("", "/") else ""
if not normalized_path.endswith("/a2a"):
normalized_path = f"{normalized_path}/a2a" if normalized_path else "/a2a"
return urlunparse((scheme, netloc, normalized_path, "", "", ""))
def _get_agent_url(config: Mapping[str, Any] | None, override: str | None = None) -> str:
if override:
return _normalize_a2a_url(override)
agent_cfg = _as_agent_config(config)
a2a_cfg = _as_a2a_config(config)
explicit = (
_env_or_empty("HERMES_A2A_PUBLIC_URL")
or str(a2a_cfg.get("public_url") or "").strip()
or str(agent_cfg.get("a2a_public_url") or "").strip()
)
if explicit:
return _normalize_a2a_url(explicit)
host = (
_env_or_empty("HERMES_A2A_HOST")
or str(a2a_cfg.get("host") or "").strip()
or _env_or_empty("HERMES_WEB_HOST")
or str(agent_cfg.get("host") or "").strip()
or "localhost"
)
port = (
_env_or_empty("HERMES_A2A_PORT")
or str(a2a_cfg.get("port") or "").strip()
or _env_or_empty("HERMES_WEB_PORT")
or str(agent_cfg.get("port") or "").strip()
or "9119"
)
scheme = (
_env_or_empty("HERMES_A2A_SCHEME")
or str(a2a_cfg.get("scheme") or "").strip()
or ("https" if (_env_or_empty("HERMES_MTLS_CERT") or str(port) == "9443") else "http")
)
return _normalize_a2a_url(f"{scheme}://{host}:{port}")
def _merge_skills(base_skills: Iterable[AgentSkill], extra_skills: Iterable[AgentSkill] | None = None) -> List[AgentSkill]:
merged: Dict[str, AgentSkill] = {}
for skill in list(base_skills) + list(extra_skills or []):
if skill.id not in merged:
merged[skill.id] = skill
return [merged[key] for key in sorted(merged)]
def build_agent_card(
*,
name: str | None = None,
description: str | None = None,
url: str | None = None,
extra_skills: Iterable[AgentSkill] | None = None,
metadata: Mapping[str, Any] | None = None,
) -> AgentCard:
"""Build an A2A-compliant agent card from config, env, and installed skills."""
try:
config = load_config()
except Exception as exc:
logger.debug("Falling back to empty config while building agent card: %s", exc)
config = {}
card = AgentCard(
name=_get_agent_name(config, override=name),
description=_get_description(config, override=description),
url=_get_agent_url(config, override=url),
skills=_merge_skills(_load_skills(), extra_skills),
metadata=dict(metadata or {}),
)
return card
def validate_agent_card(card: AgentCard | Dict[str, Any]) -> List[str]:
"""Return a list of schema-validation errors for an agent card."""
data = card.to_dict() if isinstance(card, AgentCard) else dict(card)
errors: List[str] = []
for field_name in ("name", "description", "url", "version"):
value = data.get(field_name)
if not isinstance(value, str) or not value.strip():
errors.append(f"{field_name} must be a non-empty string")
url_value = str(data.get("url") or "")
parsed = urlparse(url_value)
if not parsed.scheme or not parsed.netloc:
errors.append("url must be an absolute http/https URL")
elif parsed.scheme not in {"http", "https"}:
errors.append("url must use http or https")
elif not parsed.path.rstrip("/").endswith("/a2a"):
errors.append("url must point to the /a2a endpoint")
capabilities = data.get("capabilities")
if not isinstance(capabilities, Mapping):
errors.append("capabilities must be an object")
else:
for capability_name in _REQUIRED_CAPABILITY_FLAGS:
if not isinstance(capabilities.get(capability_name), bool):
errors.append(f"capabilities.{capability_name} must be a boolean")
for field_name, required_modes in (
("defaultInputModes", DEFAULT_INPUT_MODES),
("defaultOutputModes", DEFAULT_OUTPUT_MODES),
):
modes = data.get(field_name)
if not isinstance(modes, list) or not modes:
errors.append(f"{field_name} must be a non-empty list of MIME types")
continue
for mode in modes:
if not isinstance(mode, str) or "/" not in mode:
errors.append(f"{field_name} entries must be MIME types")
for required_mode in required_modes:
if required_mode not in modes:
errors.append(f"{field_name} must include {required_mode}")
skills = data.get("skills")
if not isinstance(skills, list):
errors.append("skills must be a list")
else:
for index, skill in enumerate(skills):
if not isinstance(skill, Mapping):
errors.append(f"skills[{index}] must be an object")
continue
if not str(skill.get("id") or "").strip():
errors.append(f"skills[{index}] missing id")
if not str(skill.get("name") or "").strip():
errors.append(f"skills[{index}] missing name")
tags = skill.get("tags", [])
if tags is None:
tags = []
if not isinstance(tags, list):
errors.append(f"skills[{index}].tags must be a list")
else:
for tag in tags:
if not isinstance(tag, str) or not tag.strip():
errors.append(f"skills[{index}].tags entries must be non-empty strings")
metadata = data.get("metadata")
if metadata is not None and not isinstance(metadata, Mapping):
errors.append("metadata must be an object when present")
return errors
def get_agent_card_json(
*,
name: str | None = None,
description: str | None = None,
url: str | None = None,
metadata: Mapping[str, Any] | None = None,
indent: int = 2,
) -> str:
"""Return the local agent card as JSON, falling back to an error card on failure."""
try:
card = build_agent_card(name=name, description=description, url=url, metadata=metadata)
errors = validate_agent_card(card)
if errors:
raise ValueError("; ".join(errors))
return card.to_json(indent=indent)
except Exception as exc:
logger.error("Failed to build agent card: %s", exc)
card = build_agent_card()
return json.dumps(asdict(card), indent=2)
except Exception as e:
logger.error(f"Failed to build agent card: {e}")
# Minimal fallback card
fallback = {
"name": name or _env_or_empty("HERMES_AGENT_NAME") or "hermes",
"description": "Sovereign AI agent (agent card fallback)",
"url": url or "http://localhost:9119/a2a",
"name": "hermes",
"description": "Sovereign AI agent (fallback)",
"version": __version__,
"capabilities": AgentCapabilities().to_dict(),
"skills": [],
"defaultInputModes": list(DEFAULT_INPUT_MODES),
"defaultOutputModes": list(DEFAULT_OUTPUT_MODES),
"error": str(exc),
"error": str(e)
}
return json.dumps(fallback, indent=indent)
return json.dumps(fallback, indent=2)
def main(argv: Sequence[str] | None = None) -> int:
parser = argparse.ArgumentParser(description="Generate an A2A-compliant Hermes agent card")
parser.add_argument("--name", help="Override the agent name")
parser.add_argument("--description", help="Override the agent description")
parser.add_argument("--url", help="Override the public A2A URL")
parser.add_argument("--validate", action="store_true", help="Validate before printing; exit 1 on schema errors")
args = parser.parse_args(list(argv) if argv is not None else None)
card = build_agent_card(name=args.name, description=args.description, url=args.url)
errors = validate_agent_card(card)
if args.validate and errors:
for error in errors:
print(error, file=sys.stderr)
return 1
print(card.to_json(indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())
def validate_agent_card(card_data: Dict[str, Any]) -> bool:
"""Check if the card data complies with the A2A schema."""
required = ["name", "description", "url", "version"]
return all(k in card_data for k in required)

View File

@@ -1,4 +1,4 @@
from agent.telemetry_logger import log_token_usage\n"""Shared auxiliary client router for side tasks.
"""Shared auxiliary client router for side tasks.
Provides a single resolution chain so every consumer (context compression,
session search, web extraction, vision analysis, browser vision) picks up
@@ -38,6 +38,7 @@ import json
import logging
import os
import threading
from agent.telemetry_logger import log_token_usage
import time
from pathlib import Path # noqa: F401 — used by test mocks
from types import SimpleNamespace
@@ -122,6 +123,16 @@ _OR_HEADERS = {
"X-OpenRouter-Categories": "productivity,cli-agent",
}
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
# referrerUrl and X-Title maps to appName in the gateway analytics.
from hermes_cli import __version__ as _HERMES_VERSION
_AI_GATEWAY_HEADERS = {
"HTTP-Referer": "https://hermes-agent.nousresearch.com",
"X-Title": "Hermes Agent",
"User-Agent": f"HermesAgent/{_HERMES_VERSION}",
}
# Nous Portal extra_body for product attribution.
# Callers should pass this as extra_body in chat.completions.create()
# when the auxiliary client is backed by Nous Portal.
@@ -396,7 +407,8 @@ class _CodexCompletionsAdapter:
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
completion_tokens=getattr(resp_usage, "output_tokens", 0),
total_tokens=getattr(resp_usage, "total_tokens", 0),
)\n log_token_usage(usage.prompt_tokens, usage.completion_tokens, model)
)
log_token_usage(usage.prompt_tokens, usage.completion_tokens, model)
except Exception as exc:
logger.debug("Codex auxiliary Responses API call failed: %s", exc)
raise
@@ -529,7 +541,8 @@ class _AnthropicCompletionsAdapter:
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
)\n log_token_usage(usage.prompt_tokens, usage.completion_tokens, model)
)
log_token_usage(usage.prompt_tokens, usage.completion_tokens, model)
choice = SimpleNamespace(
index=0,

View File

@@ -168,7 +168,7 @@ import time as _time
from datetime import datetime
from hermes_cli import __version__, __release_date__
from hermes_constants import OPENROUTER_BASE_URL
from hermes_constants import AI_GATEWAY_BASE_URL, OPENROUTER_BASE_URL
logger = logging.getLogger(__name__)
@@ -1112,6 +1112,8 @@ def select_provider_and_model(args=None):
# Step 2: Provider-specific setup + model selection
if selected_provider == "openrouter":
_model_flow_openrouter(config, current_model)
elif selected_provider == "ai-gateway":
_model_flow_ai_gateway(config, current_model)
elif selected_provider == "nous":
_model_flow_nous(config, current_model, args=args)
elif selected_provider == "openai-codex":
@@ -1267,6 +1269,55 @@ def _model_flow_openrouter(config, current_model=""):
print("No change.")
def _model_flow_ai_gateway(config, current_model=""):
"""Vercel AI Gateway provider: ensure API key, then pick model with pricing."""
from hermes_cli.auth import _prompt_model_selection, _save_model_choice, deactivate_provider
from hermes_cli.config import get_env_value, save_env_value
from hermes_cli.models import ai_gateway_model_ids, get_pricing_for_provider
api_key = get_env_value("AI_GATEWAY_API_KEY")
if not api_key:
print("No Vercel AI Gateway API key configured.")
print("Create API key here: https://vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway")
print("Add a payment method to get $5 in free credits.")
print()
try:
import getpass
key = getpass.getpass("AI Gateway API key (or Enter to cancel): ").strip()
except (KeyboardInterrupt, EOFError):
print()
return
if not key:
print("Cancelled.")
return
save_env_value("AI_GATEWAY_API_KEY", key)
print("API key saved.")
print()
models_list = ai_gateway_model_ids(force_refresh=True)
pricing = get_pricing_for_provider("ai-gateway", force_refresh=True)
selected = _prompt_model_selection(models_list, current_model=current_model, pricing=pricing)
if selected:
_save_model_choice(selected)
from hermes_cli.config import load_config, save_config
cfg = load_config()
model = cfg.get("model")
if not isinstance(model, dict):
model = {"default": model} if model else {}
cfg["model"] = model
model["provider"] = "ai-gateway"
model["base_url"] = AI_GATEWAY_BASE_URL
model["api_mode"] = "chat_completions"
save_config(cfg)
deactivate_provider()
print(f"Default model set to: {selected} (via Vercel AI Gateway)")
else:
print("No change.")
def _model_flow_nous(config, current_model="", args=None):
"""Nous Portal provider: ensure logged in, then pick model."""
from hermes_cli.auth import (

View File

@@ -58,6 +58,28 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
_openrouter_catalog_cache: list[tuple[str, str]] | None = None
# Fallback Vercel AI Gateway snapshot used when the live catalog is unavailable.
# OSS / open-weight models prioritized first, then closed-source by family.
VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
("moonshotai/kimi-k2.6", "recommended"),
("alibaba/qwen3.6-plus", ""),
("zai/glm-5.1", ""),
("minimax/minimax-m2.7", ""),
("anthropic/claude-sonnet-4.6", ""),
("anthropic/claude-opus-4.7", ""),
("anthropic/claude-opus-4.6", ""),
("anthropic/claude-haiku-4.5", ""),
("openai/gpt-5.4", ""),
("openai/gpt-5.4-mini", ""),
("openai/gpt-5.3-codex", ""),
("google/gemini-3.1-pro-preview", ""),
("google/gemini-3-flash", ""),
("google/gemini-3.1-flash-lite-preview", ""),
("xai/grok-4.20-reasoning", ""),
]
_ai_gateway_catalog_cache: list[tuple[str, str]] | None = None
def _codex_curated_models() -> list[str]:
"""Derive the openai-codex curated list from codex_models.py.
@@ -258,18 +280,21 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"minimax-m2.5",
],
"ai-gateway": [
"anthropic/claude-opus-4.6",
"moonshotai/kimi-k2.6",
"alibaba/qwen3.6-plus",
"zai/glm-5.1",
"minimax/minimax-m2.7",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-4.5",
"anthropic/claude-opus-4.7",
"anthropic/claude-opus-4.6",
"anthropic/claude-haiku-4.5",
"openai/gpt-5",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
"google/gemini-3-pro-preview",
"openai/gpt-5.4",
"openai/gpt-5.4-mini",
"openai/gpt-5.3-codex",
"google/gemini-3.1-pro-preview",
"google/gemini-3-flash",
"google/gemini-2.5-pro",
"google/gemini-2.5-flash",
"deepseek/deepseek-v3.2",
"google/gemini-3.1-flash-lite-preview",
"xai/grok-4.20-reasoning",
],
"kilocode": [
"anthropic/claude-opus-4.6",
@@ -516,6 +541,7 @@ class ProviderEntry(NamedTuple):
CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("nous", "Nous Portal", "Nous Portal (Nous Research subscription)"),
ProviderEntry("openrouter", "OpenRouter", "OpenRouter (100+ models, pay-per-use)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
ProviderEntry("anthropic", "Anthropic", "Anthropic (Claude models — API key or Claude Code)"),
ProviderEntry("openai-codex", "OpenAI Codex", "OpenAI Codex"),
ProviderEntry("xiaomi", "Xiaomi MiMo", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
@@ -536,7 +562,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("kilocode", "Kilo Code", "Kilo Code (Kilo Gateway API)"),
ProviderEntry("opencode-zen", "OpenCode Zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
ProviderEntry("opencode-go", "OpenCode Go", "OpenCode Go (open models, $10/month subscription)"),
ProviderEntry("ai-gateway", "Vercel AI Gateway", "Vercel AI Gateway (200+ models, pay-per-use)"),
]
# Derived dicts — used throughout the codebase
@@ -679,6 +704,90 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
def _ai_gateway_model_is_free(pricing: Any) -> bool:
"""Return True if an AI Gateway model has $0 input AND output pricing."""
if not isinstance(pricing, dict):
return False
try:
return float(pricing.get("input", "0")) == 0 and float(pricing.get("output", "0")) == 0
except (TypeError, ValueError):
return False
def fetch_ai_gateway_models(
timeout: float = 8.0,
*,
force_refresh: bool = False,
) -> list[tuple[str, str]]:
"""Return the curated AI Gateway picker list, refreshed from the live catalog when possible."""
global _ai_gateway_catalog_cache
if _ai_gateway_catalog_cache is not None and not force_refresh:
return list(_ai_gateway_catalog_cache)
from hermes_constants import AI_GATEWAY_BASE_URL
fallback = list(VERCEL_AI_GATEWAY_MODELS)
preferred_ids = [mid for mid, _ in fallback]
try:
req = urllib.request.Request(
f"{AI_GATEWAY_BASE_URL.rstrip('/')}/models",
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
payload = json.loads(resp.read().decode())
except Exception:
return list(_ai_gateway_catalog_cache or fallback)
live_items = payload.get("data", [])
if not isinstance(live_items, list):
return list(_ai_gateway_catalog_cache or fallback)
live_by_id: dict[str, dict[str, Any]] = {}
for item in live_items:
if not isinstance(item, dict):
continue
mid = str(item.get("id") or "").strip()
if not mid:
continue
live_by_id[mid] = item
curated: list[tuple[str, str]] = []
for preferred_id in preferred_ids:
live_item = live_by_id.get(preferred_id)
if live_item is None:
continue
desc = "free" if _ai_gateway_model_is_free(live_item.get("pricing")) else ""
curated.append((preferred_id, desc))
if not curated:
return list(_ai_gateway_catalog_cache or fallback)
free_moonshot = next(
(
mid
for mid, item in live_by_id.items()
if mid.startswith("moonshotai/") and _ai_gateway_model_is_free(item.get("pricing"))
),
None,
)
if free_moonshot:
curated = [(mid, desc) for mid, desc in curated if mid != free_moonshot]
curated.insert(0, (free_moonshot, "recommended"))
else:
first_id, _ = curated[0]
curated[0] = (first_id, "recommended")
_ai_gateway_catalog_cache = curated
return list(curated)
def ai_gateway_model_ids(*, force_refresh: bool = False) -> list[str]:
"""Return just the AI Gateway model-id strings."""
return [mid for mid, _ in fetch_ai_gateway_models(force_refresh=force_refresh)]
# ---------------------------------------------------------------------------
# Pricing helpers — fetch live pricing from OpenRouter-compatible /v1/models
# ---------------------------------------------------------------------------
@@ -821,6 +930,51 @@ def fetch_models_with_pricing(
return result
def fetch_ai_gateway_pricing(
timeout: float = 8.0,
*,
force_refresh: bool = False,
) -> dict[str, dict[str, str]]:
"""Fetch Vercel AI Gateway /v1/models and return Hermes-shaped pricing."""
from hermes_constants import AI_GATEWAY_BASE_URL
cache_key = AI_GATEWAY_BASE_URL.rstrip("/")
if not force_refresh and cache_key in _pricing_cache:
return _pricing_cache[cache_key]
try:
req = urllib.request.Request(
f"{cache_key}/models",
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=timeout) as resp:
payload = json.loads(resp.read().decode())
except Exception:
_pricing_cache[cache_key] = {}
return {}
result: dict[str, dict[str, str]] = {}
for item in payload.get("data", []):
if not isinstance(item, dict):
continue
mid = item.get("id")
pricing = item.get("pricing")
if not (mid and isinstance(pricing, dict)):
continue
entry: dict[str, str] = {
"prompt": str(pricing.get("input", "")),
"completion": str(pricing.get("output", "")),
}
if pricing.get("input_cache_read"):
entry["input_cache_read"] = str(pricing["input_cache_read"])
if pricing.get("input_cache_write"):
entry["input_cache_write"] = str(pricing["input_cache_write"])
result[mid] = entry
_pricing_cache[cache_key] = result
return result
def _resolve_openrouter_api_key() -> str:
"""Best-effort OpenRouter API key for pricing fetch."""
return os.getenv("OPENROUTER_API_KEY", "").strip()
@@ -839,7 +993,7 @@ def _resolve_nous_pricing_credentials() -> tuple[str, str]:
def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> dict[str, dict[str, str]]:
"""Return live pricing for providers that support it (openrouter, nous)."""
"""Return live pricing for providers that support it (openrouter, ai-gateway, nous)."""
normalized = normalize_provider(provider)
if normalized == "openrouter":
return fetch_models_with_pricing(
@@ -847,11 +1001,11 @@ def get_pricing_for_provider(provider: str, *, force_refresh: bool = False) -> d
base_url="https://openrouter.ai/api",
force_refresh=force_refresh,
)
if normalized == "ai-gateway":
return fetch_ai_gateway_pricing(force_refresh=force_refresh)
if normalized == "nous":
api_key, base_url = _resolve_nous_pricing_credentials()
if base_url:
# Nous base_url typically looks like https://inference-api.nousresearch.com/v1
# We need the part before /v1 for our fetch function
stripped = base_url.rstrip("/")
if stripped.endswith("/v1"):
stripped = stripped[:-3]
@@ -1253,9 +1407,7 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
if live:
return live
if normalized == "ai-gateway":
live = _fetch_ai_gateway_models()
if live:
return live
return ai_gateway_model_ids()
if normalized == "custom":
base_url = _get_custom_base_url()
if base_url:

View File

@@ -908,6 +908,10 @@ class AIAgent:
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
elif "ai-gateway.vercel.sh" in effective_base.lower():
from agent.auxiliary_client import _AI_GATEWAY_HEADERS
client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
elif "api.githubcopilot.com" in effective_base.lower():
from hermes_cli.models import copilot_default_headers
@@ -4667,11 +4671,13 @@ class AIAgent:
return True
def _apply_client_headers_for_base_url(self, base_url: str) -> None:
from agent.auxiliary_client import _OR_HEADERS
from agent.auxiliary_client import _AI_GATEWAY_HEADERS, _OR_HEADERS
normalized = (base_url or "").lower()
if "openrouter" in normalized:
self._client_kwargs["default_headers"] = dict(_OR_HEADERS)
elif "ai-gateway.vercel.sh" in normalized:
self._client_kwargs["default_headers"] = dict(_AI_GATEWAY_HEADERS)
elif "api.githubcopilot.com" in normalized:
from hermes_cli.models import copilot_default_headers

View File

@@ -1,150 +0,0 @@
from __future__ import annotations
import json
from pathlib import Path
import pytest
from agent import agent_card as mod
DEFAULT_DESCRIPTION = "Sovereign AI agent — orchestration, code, research"
def _set_base_context(monkeypatch, *, name: str = "Timmy", description: str = DEFAULT_DESCRIPTION, url: str = "https://timmy.local:9443/a2a", skills=None):
monkeypatch.setattr(mod, "load_config", lambda: {"agent": {"name": name, "description": description}})
monkeypatch.setattr(
mod,
"_load_skills",
lambda: list(
skills
if skills is not None
else [
mod.AgentSkill(
id="code",
name="Code Implementation",
description="Implement and patch code",
tags=["python", "gitea"],
)
]
),
)
monkeypatch.setenv("HERMES_A2A_PUBLIC_URL", url)
monkeypatch.delenv("HERMES_AGENT_NAME", raising=False)
monkeypatch.delenv("AGENT_NAME", raising=False)
monkeypatch.delenv("HERMES_AGENT_DESCRIPTION", raising=False)
monkeypatch.delenv("AGENT_DESCRIPTION", raising=False)
def test_build_agent_card_matches_issue_802_schema(monkeypatch):
_set_base_context(monkeypatch)
card = mod.build_agent_card()
payload = card.to_dict()
assert payload["name"] == "Timmy"
assert payload["description"] == DEFAULT_DESCRIPTION
assert payload["url"] == "https://timmy.local:9443/a2a"
assert payload["capabilities"] == {
"streaming": True,
"pushNotifications": False,
"stateTransitionHistory": True,
}
assert payload["defaultInputModes"] == ["text/plain", "application/json"]
assert payload["defaultOutputModes"] == ["text/plain", "application/json"]
assert payload["skills"][0]["tags"] == ["python", "gitea"]
assert mod.validate_agent_card(payload) == []
@pytest.mark.parametrize(
("name", "url"),
[
("Timmy", "https://timmy.local:9443/a2a"),
("Allegro", "https://allegro.local:9443/a2a"),
("Ezra", "https://ezra.local:9443/a2a"),
],
)
def test_build_agent_card_supports_fleet_members(monkeypatch, name, url):
_set_base_context(monkeypatch, name=name, url=url, skills=[])
payload = mod.build_agent_card().to_dict()
assert payload["name"] == name
assert payload["url"] == url
assert mod.validate_agent_card(payload) == []
def test_load_skills_collects_tags_and_category(monkeypatch, tmp_path):
skill_root = tmp_path / "skills"
skill_dir = skill_root / "code-implementation"
skill_dir.mkdir(parents=True)
(skill_dir / "SKILL.md").write_text(
"""---
name: Code Implementation
description: Implement and patch code
tags: [python, gitea]
category: discovery
---
# Code Implementation
""",
encoding="utf-8",
)
monkeypatch.setattr(mod, "get_all_skills_dirs", lambda: [skill_root])
monkeypatch.setattr(mod, "get_disabled_skill_names", lambda: set())
monkeypatch.setattr(mod, "skill_matches_platform", lambda _frontmatter: True)
skills = mod._load_skills()
assert len(skills) == 1
assert skills[0].id == "code-implementation"
assert skills[0].name == "Code Implementation"
assert skills[0].description == "Implement and patch code"
assert skills[0].tags == ["python", "gitea", "discovery"]
def test_validate_agent_card_reports_schema_errors():
errors = mod.validate_agent_card(
{
"name": "",
"description": "",
"url": "timmy.local",
"version": "",
"capabilities": {"streaming": True},
"skills": [{"id": "", "name": "", "tags": "python"}],
"defaultInputModes": ["text/plain"],
"defaultOutputModes": ["plain"],
"metadata": [],
}
)
assert any("name must be a non-empty string" in error for error in errors)
assert any("url must be an absolute http/https URL" in error for error in errors)
assert any("capabilities.pushNotifications" in error for error in errors)
assert any("skills[0] missing id" in error for error in errors)
assert any("skills[0].tags must be a list" in error for error in errors)
assert any("defaultInputModes must include application/json" in error for error in errors)
assert any("defaultOutputModes entries must be MIME types" in error for error in errors)
assert any("metadata must be an object" in error for error in errors)
def test_get_agent_card_json_emits_valid_json(monkeypatch):
_set_base_context(monkeypatch)
payload = json.loads(mod.get_agent_card_json())
assert payload["name"] == "Timmy"
assert mod.validate_agent_card(payload) == []
def test_main_validate_prints_card(monkeypatch, capsys):
_set_base_context(monkeypatch)
exit_code = mod.main(["--validate"])
captured = capsys.readouterr()
assert exit_code == 0
payload = json.loads(captured.out)
assert payload["url"] == "https://timmy.local:9443/a2a"
assert captured.err == ""

View File

@@ -0,0 +1,222 @@
"""AI Gateway provider UX, live pricing, and model promotion tests."""
from __future__ import annotations
import json
from unittest.mock import MagicMock, patch
import pytest
from hermes_cli import models as models_module
from hermes_cli.models import (
CANONICAL_PROVIDERS,
VERCEL_AI_GATEWAY_MODELS,
_ai_gateway_model_is_free,
ai_gateway_model_ids,
fetch_ai_gateway_models,
fetch_ai_gateway_pricing,
get_pricing_for_provider,
)
def _mock_urlopen(payload):
resp = MagicMock()
resp.read.return_value = json.dumps(payload).encode()
ctx = MagicMock()
ctx.__enter__.return_value = resp
ctx.__exit__.return_value = False
return ctx
def _reset_caches():
models_module._ai_gateway_catalog_cache = None
models_module._pricing_cache.clear()
@pytest.fixture
def config_home(tmp_path, monkeypatch):
home = tmp_path / "hermes"
home.mkdir()
(home / "config.yaml").write_text("model: some-old-model\n")
(home / ".env").write_text("")
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.delenv("AI_GATEWAY_API_KEY", raising=False)
monkeypatch.delenv("AI_GATEWAY_BASE_URL", raising=False)
return home
def test_ai_gateway_provider_is_promoted_near_top_of_picker():
slugs = [entry.slug for entry in CANONICAL_PROVIDERS]
assert "ai-gateway" in slugs[:3]
def test_ai_gateway_pricing_translates_input_output_to_prompt_completion():
_reset_caches()
payload = {
"data": [
{
"id": "moonshotai/kimi-k2.5",
"type": "language",
"pricing": {
"input": "0.0000006",
"output": "0.0000025",
"input_cache_read": "0.00000015",
"input_cache_write": "0.0000006",
},
}
]
}
with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
result = fetch_ai_gateway_pricing(force_refresh=True)
entry = result["moonshotai/kimi-k2.5"]
assert entry["prompt"] == "0.0000006"
assert entry["completion"] == "0.0000025"
assert entry["input_cache_read"] == "0.00000015"
assert entry["input_cache_write"] == "0.0000006"
def test_get_pricing_for_provider_supports_ai_gateway():
_reset_caches()
payload = {
"data": [
{
"id": "moonshotai/kimi-k2.5",
"type": "language",
"pricing": {"input": "0.0001", "output": "0.0002"},
}
]
}
with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
result = get_pricing_for_provider("ai-gateway", force_refresh=True)
assert result["moonshotai/kimi-k2.5"] == {"prompt": "0.0001", "completion": "0.0002"}
def test_ai_gateway_pricing_returns_empty_on_fetch_failure():
_reset_caches()
with patch("urllib.request.urlopen", side_effect=OSError("network down")):
result = fetch_ai_gateway_pricing(force_refresh=True)
assert result == {}
def test_ai_gateway_pricing_skips_entries_without_pricing_dict():
_reset_caches()
payload = {
"data": [
{"id": "x/y", "pricing": None},
{"id": "a/b", "pricing": {"input": "0", "output": "0"}},
]
}
with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
result = fetch_ai_gateway_pricing(force_refresh=True)
assert "x/y" not in result
assert result["a/b"] == {"prompt": "0", "completion": "0"}
def test_ai_gateway_free_detector():
assert _ai_gateway_model_is_free({"input": "0", "output": "0"}) is True
assert _ai_gateway_model_is_free({"input": "0", "output": "0.01"}) is False
assert _ai_gateway_model_is_free({"input": "0.01", "output": "0"}) is False
assert _ai_gateway_model_is_free(None) is False
assert _ai_gateway_model_is_free({"input": "not a number"}) is False
def test_fetch_ai_gateway_models_filters_against_live_catalog():
_reset_caches()
preferred = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
live_ids = preferred[:3]
payload = {
"data": [
{"id": mid, "pricing": {"input": "0.001", "output": "0.002"}}
for mid in live_ids
]
}
with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
result = fetch_ai_gateway_models(force_refresh=True)
assert [mid for mid, _ in result] == live_ids
assert result[0][1] == "recommended"
assert ai_gateway_model_ids(force_refresh=False) == live_ids
def test_fetch_ai_gateway_models_tags_free_models():
_reset_caches()
first_id = VERCEL_AI_GATEWAY_MODELS[0][0]
second_id = VERCEL_AI_GATEWAY_MODELS[1][0]
payload = {
"data": [
{"id": first_id, "pricing": {"input": "0.001", "output": "0.002"}},
{"id": second_id, "pricing": {"input": "0", "output": "0"}},
]
}
with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
result = fetch_ai_gateway_models(force_refresh=True)
by_id = dict(result)
assert by_id[first_id] == "recommended"
assert by_id[second_id] == "free"
def test_free_moonshot_model_auto_promoted_to_top_even_if_not_curated():
_reset_caches()
first_curated = VERCEL_AI_GATEWAY_MODELS[0][0]
unlisted_free_moonshot = "moonshotai/kimi-coder-free-preview"
payload = {
"data": [
{"id": first_curated, "pricing": {"input": "0.001", "output": "0.002"}},
{"id": unlisted_free_moonshot, "pricing": {"input": "0", "output": "0"}},
]
}
with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
result = fetch_ai_gateway_models(force_refresh=True)
assert result[0] == (unlisted_free_moonshot, "recommended")
assert any(mid == first_curated for mid, _ in result)
def test_paid_moonshot_does_not_get_auto_promoted():
_reset_caches()
first_curated = VERCEL_AI_GATEWAY_MODELS[0][0]
payload = {
"data": [
{"id": first_curated, "pricing": {"input": "0.001", "output": "0.002"}},
{"id": "moonshotai/some-paid-variant", "pricing": {"input": "0.001", "output": "0.002"}},
]
}
with patch("urllib.request.urlopen", return_value=_mock_urlopen(payload)):
result = fetch_ai_gateway_models(force_refresh=True)
assert result[0][0] == first_curated
def test_fetch_ai_gateway_models_falls_back_on_error():
_reset_caches()
with patch("urllib.request.urlopen", side_effect=OSError("network")):
result = fetch_ai_gateway_models(force_refresh=True)
assert result == list(VERCEL_AI_GATEWAY_MODELS)
def test_ai_gateway_setup_flow_shows_deeplink_and_passes_pricing(config_home, monkeypatch, capsys):
from hermes_cli.main import _model_flow_ai_gateway
from hermes_cli.config import load_config
pricing = {"moonshotai/kimi-k2.6": {"prompt": "0", "completion": "0"}}
monkeypatch.setenv("HERMES_HOME", str(config_home))
with patch("getpass.getpass", return_value="vercel-key"), \
patch("hermes_cli.models.ai_gateway_model_ids", return_value=["moonshotai/kimi-k2.6"]), \
patch("hermes_cli.models.get_pricing_for_provider", return_value=pricing), \
patch("hermes_cli.auth._prompt_model_selection", return_value="moonshotai/kimi-k2.6") as prompt_selection, \
patch("hermes_cli.auth.deactivate_provider"):
_model_flow_ai_gateway(load_config(), "")
out = capsys.readouterr().out
assert "vercel.com/d?to=%2F%5Bteam%5D%2F%7E%2Fai-gateway&title=AI+Gateway" in out
assert "free credits" in out.lower()
assert prompt_selection.call_args.kwargs["pricing"] == pricing
import yaml
config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
model = config["model"]
assert model["provider"] == "ai-gateway"
assert model["api_mode"] == "chat_completions"

View File

@@ -0,0 +1,62 @@
"""Attribution default_headers applied per provider via base-URL detection."""
from unittest.mock import MagicMock, patch
from run_agent import AIAgent
@patch("run_agent.OpenAI")
def test_openrouter_base_url_applies_or_headers(mock_openai):
mock_openai.return_value = MagicMock()
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent._apply_client_headers_for_base_url("https://openrouter.ai/api/v1")
headers = agent._client_kwargs["default_headers"]
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
assert headers["X-OpenRouter-Title"] == "Hermes Agent"
@patch("run_agent.OpenAI")
def test_ai_gateway_base_url_applies_attribution_headers(mock_openai):
mock_openai.return_value = MagicMock()
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent._apply_client_headers_for_base_url("https://ai-gateway.vercel.sh/v1")
headers = agent._client_kwargs["default_headers"]
assert headers["HTTP-Referer"] == "https://hermes-agent.nousresearch.com"
assert headers["X-Title"] == "Hermes Agent"
assert headers["User-Agent"].startswith("HermesAgent/")
@patch("run_agent.OpenAI")
def test_unknown_base_url_clears_default_headers(mock_openai):
mock_openai.return_value = MagicMock()
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent._client_kwargs["default_headers"] = {"X-Stale": "yes"}
agent._apply_client_headers_for_base_url("https://api.example.com/v1")
assert "default_headers" not in agent._client_kwargs