diff --git a/README.md b/README.md index 57ec3d427..f63bb2f1a 100644 --- a/README.md +++ b/README.md @@ -121,11 +121,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro | Provider | Setup | |----------|-------| -| **Nous Portal** | `hermes login` (OAuth, subscription-based) | +| **Nous Portal** | `hermes model` (OAuth, subscription-based) | +| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) | | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` | | **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` | -**Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools. +**Codex note:** The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Credentials are stored at `~/.codex/auth.json` and auto-refresh. No Codex CLI installation required. + +**Note:** Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools. --- @@ -368,7 +371,7 @@ hermes --resume # Resume a specific session (-r) # Provider & model management hermes model # Switch provider and model interactively -hermes login # Authenticate with Nous Portal (OAuth) +hermes model # Select provider and model hermes logout # Clear stored OAuth credentials # Configuration @@ -1638,7 +1641,7 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t |------|-------------| | `~/.hermes/config.yaml` | Your settings | | `~/.hermes/.env` | API keys and secrets | -| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) | +| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes model`) | | `~/.hermes/cron/` | Scheduled jobs data | | `~/.hermes/sessions/` | Gateway session data | | `~/.hermes/hermes-agent/` | Installation directory | @@ -1666,7 +1669,7 @@ hermes config # View current settings Common issues: - **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key` - **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH -- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh. +- **"Run `hermes setup` to re-authenticate"**: Your Nous Portal session expired. Run `hermes setup` or `hermes model` to refresh. - **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference. - **Gateway won't start**: Check `hermes gateway status` and logs - **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 04ac41a56..4fb879414 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -8,7 +8,9 @@ Resolution order for text tasks: 1. OpenRouter (OPENROUTER_API_KEY) 2. Nous Portal (~/.hermes/auth.json active provider) 3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) - 4. None + 4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex, + wrapped to look like a chat.completions client) + 5. None Resolution order for vision/multimodal tasks: 1. OpenRouter @@ -20,7 +22,8 @@ import json import logging import os from pathlib import Path -from typing import Optional, Tuple +from types import SimpleNamespace +from typing import Any, Dict, List, Optional, Tuple from openai import OpenAI @@ -49,6 +52,188 @@ _NOUS_MODEL = "gemini-3-flash" _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1" _AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json" +# Codex fallback: uses the Responses API (the only endpoint the Codex +# OAuth token can access) with a fast model for auxiliary tasks. +_CODEX_AUX_MODEL = "gpt-5.3-codex" +_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex" + + +# ── Codex Responses → chat.completions adapter ───────────────────────────── +# All auxiliary consumers call client.chat.completions.create(**kwargs) and +# read response.choices[0].message.content. This adapter translates those +# calls to the Codex Responses API so callers don't need any changes. + +class _CodexCompletionsAdapter: + """Drop-in shim that accepts chat.completions.create() kwargs and + routes them through the Codex Responses streaming API.""" + + def __init__(self, real_client: OpenAI, model: str): + self._client = real_client + self._model = model + + def create(self, **kwargs) -> Any: + messages = kwargs.get("messages", []) + model = kwargs.get("model", self._model) + temperature = kwargs.get("temperature") + + # Separate system/instructions from conversation messages + instructions = "You are a helpful assistant." + input_msgs: List[Dict[str, Any]] = [] + for msg in messages: + role = msg.get("role", "user") + content = msg.get("content", "") + if role == "system": + instructions = content + else: + input_msgs.append({"role": role, "content": content}) + + resp_kwargs: Dict[str, Any] = { + "model": model, + "instructions": instructions, + "input": input_msgs or [{"role": "user", "content": ""}], + "stream": True, + "store": False, + } + + max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens") + if max_tokens is not None: + resp_kwargs["max_output_tokens"] = int(max_tokens) + if temperature is not None: + resp_kwargs["temperature"] = temperature + + # Tools support for flush_memories and similar callers + tools = kwargs.get("tools") + if tools: + converted = [] + for t in tools: + fn = t.get("function", {}) if isinstance(t, dict) else {} + name = fn.get("name") + if not name: + continue + converted.append({ + "type": "function", + "name": name, + "description": fn.get("description", ""), + "parameters": fn.get("parameters", {}), + }) + if converted: + resp_kwargs["tools"] = converted + + # Stream and collect the response + text_parts: List[str] = [] + tool_calls_raw: List[Any] = [] + usage = None + + try: + with self._client.responses.stream(**resp_kwargs) as stream: + for _event in stream: + pass + final = stream.get_final_response() + + # Extract text and tool calls from the Responses output + for item in getattr(final, "output", []): + item_type = getattr(item, "type", None) + if item_type == "message": + for part in getattr(item, "content", []): + ptype = getattr(part, "type", None) + if ptype in ("output_text", "text"): + text_parts.append(getattr(part, "text", "")) + elif item_type == "function_call": + tool_calls_raw.append(SimpleNamespace( + id=getattr(item, "call_id", ""), + type="function", + function=SimpleNamespace( + name=getattr(item, "name", ""), + arguments=getattr(item, "arguments", "{}"), + ), + )) + + resp_usage = getattr(final, "usage", None) + if resp_usage: + usage = SimpleNamespace( + prompt_tokens=getattr(resp_usage, "input_tokens", 0), + completion_tokens=getattr(resp_usage, "output_tokens", 0), + total_tokens=getattr(resp_usage, "total_tokens", 0), + ) + except Exception as exc: + logger.debug("Codex auxiliary Responses API call failed: %s", exc) + raise + + content = "".join(text_parts).strip() or None + + # Build a response that looks like chat.completions + message = SimpleNamespace( + role="assistant", + content=content, + tool_calls=tool_calls_raw or None, + ) + choice = SimpleNamespace( + index=0, + message=message, + finish_reason="stop" if not tool_calls_raw else "tool_calls", + ) + return SimpleNamespace( + choices=[choice], + model=model, + usage=usage, + ) + + +class _CodexChatShim: + """Wraps the adapter to provide client.chat.completions.create().""" + + def __init__(self, adapter: _CodexCompletionsAdapter): + self.completions = adapter + + +class CodexAuxiliaryClient: + """OpenAI-client-compatible wrapper that routes through Codex Responses API. + + Consumers can call client.chat.completions.create(**kwargs) as normal. + Also exposes .api_key and .base_url for introspection by async wrappers. + """ + + def __init__(self, real_client: OpenAI, model: str): + self._real_client = real_client + adapter = _CodexCompletionsAdapter(real_client, model) + self.chat = _CodexChatShim(adapter) + self.api_key = real_client.api_key + self.base_url = real_client.base_url + + def close(self): + self._real_client.close() + + +class _AsyncCodexCompletionsAdapter: + """Async version of the Codex Responses adapter. + + Wraps the sync adapter via asyncio.to_thread() so async consumers + (web_tools, session_search) can await it as normal. + """ + + def __init__(self, sync_adapter: _CodexCompletionsAdapter): + self._sync = sync_adapter + + async def create(self, **kwargs) -> Any: + import asyncio + return await asyncio.to_thread(self._sync.create, **kwargs) + + +class _AsyncCodexChatShim: + def __init__(self, adapter: _AsyncCodexCompletionsAdapter): + self.completions = adapter + + +class AsyncCodexAuxiliaryClient: + """Async-compatible wrapper matching AsyncOpenAI.chat.completions.create().""" + + def __init__(self, sync_wrapper: "CodexAuxiliaryClient"): + sync_adapter = sync_wrapper.chat.completions + async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter) + self.chat = _AsyncCodexChatShim(async_adapter) + self.api_key = sync_wrapper.api_key + self.base_url = sync_wrapper.base_url + def _read_nous_auth() -> Optional[dict]: """Read and validate ~/.hermes/auth.json for an active Nous provider. @@ -82,12 +267,31 @@ def _nous_base_url() -> str: return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL) +def _read_codex_access_token() -> Optional[str]: + """Read a valid Codex OAuth access token from ~/.codex/auth.json.""" + try: + codex_auth = Path.home() / ".codex" / "auth.json" + if not codex_auth.is_file(): + return None + data = json.loads(codex_auth.read_text()) + tokens = data.get("tokens") + if not isinstance(tokens, dict): + return None + access_token = tokens.get("access_token") + if isinstance(access_token, str) and access_token.strip(): + return access_token.strip() + return None + except Exception as exc: + logger.debug("Could not read Codex auth for auxiliary client: %s", exc) + return None + + # ── Public API ────────────────────────────────────────────────────────────── def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: """Return (client, model_slug) for text-only auxiliary tasks. - Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None). + Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None). """ # 1. OpenRouter or_key = os.getenv("OPENROUTER_API_KEY") @@ -115,11 +319,44 @@ def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: logger.debug("Auxiliary text client: custom endpoint (%s)", model) return OpenAI(api_key=custom_key, base_url=custom_base), model - # 4. Nothing available + # 4. Codex OAuth -- uses the Responses API (only endpoint the token + # can access), wrapped to look like a chat.completions client. + codex_token = _read_codex_access_token() + if codex_token: + logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) + real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) + return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL + + # 5. Nothing available logger.debug("Auxiliary text client: none available") return None, None +def get_async_text_auxiliary_client(): + """Return (async_client, model_slug) for async consumers. + + For standard providers returns (AsyncOpenAI, model). For Codex returns + (AsyncCodexAuxiliaryClient, model) which wraps the Responses API. + Returns (None, None) when no provider is available. + """ + from openai import AsyncOpenAI + + sync_client, model = get_text_auxiliary_client() + if sync_client is None: + return None, None + + if isinstance(sync_client, CodexAuxiliaryClient): + return AsyncCodexAuxiliaryClient(sync_client), model + + async_kwargs = { + "api_key": sync_client.api_key, + "base_url": str(sync_client.base_url), + } + if "openrouter" in str(sync_client.base_url).lower(): + async_kwargs["default_headers"] = dict(_OR_HEADERS) + return AsyncOpenAI(**async_kwargs), model + + def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: """Return (client, model_slug) for vision/multimodal auxiliary tasks. @@ -161,11 +398,12 @@ def auxiliary_max_tokens_param(value: int) -> dict: OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'. + The Codex adapter translates max_tokens internally, so we use max_tokens + for it as well. """ custom_base = os.getenv("OPENAI_BASE_URL", "") or_key = os.getenv("OPENROUTER_API_KEY") - # Only use max_completion_tokens when the auxiliary client resolved to - # direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com) + # Only use max_completion_tokens for direct OpenAI custom endpoints if (not or_key and _read_nous_auth() is None and "api.openai.com" in custom_base.lower()): diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 6f9ce3c01..034eb8f99 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -31,7 +31,7 @@ class ContextCompressor: threshold_percent: float = 0.85, protect_first_n: int = 3, protect_last_n: int = 4, - summary_target_tokens: int = 500, + summary_target_tokens: int = 2500, quiet_mode: bool = False, summary_model_override: str = None, ): diff --git a/cli.py b/cli.py index b85edc6b7..16ce554ee 100755 --- a/cli.py +++ b/cli.py @@ -841,12 +841,10 @@ class HermesCLI: or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"]) ) self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY") - # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default - if max_turns != 60: # CLI arg was explicitly set self._nous_key_expires_at: Optional[str] = None self._nous_key_source: Optional[str] = None - # Max turns priority: CLI arg > config file > env var > default - if max_turns is not None: + # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default + if max_turns is not None: # CLI arg was explicitly set self.max_turns = max_turns elif CLI_CONFIG["agent"].get("max_turns"): self.max_turns = CLI_CONFIG["agent"]["max_turns"] diff --git a/docs/cli.md b/docs/cli.md index a9257024c..6c1abc399 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -12,7 +12,7 @@ hermes hermes --model "anthropic/claude-sonnet-4" # With specific provider -hermes --provider nous # Use Nous Portal (requires: hermes login) +hermes --provider nous # Use Nous Portal (requires: hermes model) hermes --provider openrouter # Force OpenRouter # With specific toolsets @@ -93,7 +93,7 @@ model: ``` **Provider selection** (`provider` field): -- `auto` (default): Uses Nous Portal if logged in (`hermes login`), otherwise falls back to OpenRouter/env vars. +- `auto` (default): Uses Nous Portal if logged in (`hermes model`), otherwise falls back to OpenRouter/env vars. - `openrouter`: Always uses `OPENROUTER_API_KEY` from `.env`. - `nous`: Always uses Nous Portal OAuth credentials from `auth.json`. diff --git a/gateway/run.py b/gateway/run.py index 942c72bbc..c21cf8b9e 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -214,17 +214,12 @@ class GatewayRunner: return from run_agent import AIAgent - _flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "") - _flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") - _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6") - - if not _flush_api_key: + runtime_kwargs = _resolve_runtime_agent_kwargs() + if not runtime_kwargs.get("api_key"): return tmp_agent = AIAgent( - model=_flush_model, - api_key=_flush_api_key, - base_url=_flush_base_url, + **runtime_kwargs, max_iterations=8, quiet_mode=True, enabled_toolsets=["memory", "skills"], @@ -979,12 +974,10 @@ class GatewayRunner: if old_history: from run_agent import AIAgent loop = asyncio.get_event_loop() - # Resolve credentials so the flush agent can reach the LLM - _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" + _flush_kwargs = _resolve_runtime_agent_kwargs() def _do_flush(): tmp_agent = AIAgent( - model=_flush_model, - **_resolve_runtime_agent_kwargs(), + **_flush_kwargs, max_iterations=5, quiet_mode=True, enabled_toolsets=["memory"], diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 864916b32..098b7620c 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -10,7 +10,7 @@ Architecture: - Auth store (auth.json) holds per-provider credential state - resolve_provider() picks the active provider via priority chain - resolve_*_runtime_credentials() handles token refresh and key minting -- login_command() / logout_command() are the CLI entry points +- logout_command() is the CLI entry point for clearing auth """ from __future__ import annotations @@ -127,7 +127,7 @@ def format_auth_error(error: Exception) -> str: return str(error) if error.relogin_required: - return f"{error} Run `hermes login` to re-authenticate." + return f"{error} Run `hermes model` to re-authenticate." if error.code == "subscription_required": return ( @@ -1172,6 +1172,39 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]: return {"logged_in": False} +# ============================================================================= +# External credential detection +# ============================================================================= + +def detect_external_credentials() -> List[Dict[str, Any]]: + """Scan for credentials from other CLI tools that Hermes can reuse. + + Returns a list of dicts, each with: + - provider: str -- Hermes provider id (e.g. "openai-codex") + - path: str -- filesystem path where creds were found + - label: str -- human-friendly description for the setup UI + """ + found: List[Dict[str, Any]] = [] + + # Codex CLI: ~/.codex/auth.json (or $CODEX_HOME/auth.json) + try: + codex_home = resolve_codex_home_path() + codex_auth = codex_home / "auth.json" + if codex_auth.is_file(): + data = json.loads(codex_auth.read_text()) + tokens = data.get("tokens", {}) + if isinstance(tokens, dict) and tokens.get("access_token"): + found.append({ + "provider": "openai-codex", + "path": str(codex_auth), + "label": f"Codex CLI credentials found ({codex_auth})", + }) + except Exception: + pass + + return found + + # ============================================================================= # CLI Commands — login / logout # ============================================================================= @@ -1328,56 +1361,43 @@ def _save_model_choice(model_id: str) -> None: def login_command(args) -> None: - """Run OAuth device code login for the selected provider.""" - provider_id = getattr(args, "provider", None) or "nous" - - if provider_id not in PROVIDER_REGISTRY: - print(f"Unknown provider: {provider_id}") - print(f"Available: {', '.join(PROVIDER_REGISTRY.keys())}") - raise SystemExit(1) - - pconfig = PROVIDER_REGISTRY[provider_id] - - if provider_id == "nous": - _login_nous(args, pconfig) - elif provider_id == "openai-codex": - _login_openai_codex(args, pconfig) - else: - print(f"Login for provider '{provider_id}' is not yet implemented.") - raise SystemExit(1) + """Deprecated: use 'hermes model' or 'hermes setup' instead.""" + print("The 'hermes login' command has been removed.") + print("Use 'hermes model' to select a provider and model,") + print("or 'hermes setup' for full interactive setup.") + raise SystemExit(0) def _login_openai_codex(args, pconfig: ProviderConfig) -> None: - """OpenAI Codex login flow using Codex CLI auth state.""" - codex_path = shutil.which("codex") - if not codex_path: - print("Codex CLI was not found in PATH.") - print("Install Codex CLI, then retry `hermes login --provider openai-codex`.") - raise SystemExit(1) + """OpenAI Codex login via device code flow (no Codex CLI required).""" + codex_home = resolve_codex_home_path() - print(f"Starting Hermes login via {pconfig.name}...") - print(f"Using Codex CLI: {codex_path}") - print(f"Codex home: {resolve_codex_home_path()}") - - creds: Dict[str, Any] + # Check for existing valid credentials first try: - creds = resolve_codex_runtime_credentials() + existing = resolve_codex_runtime_credentials() + print(f"Existing Codex credentials found at {codex_home / 'auth.json'}") + try: + reuse = input("Use existing credentials? [Y/n]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + reuse = "y" + if reuse in ("", "y", "yes"): + creds = existing + _save_codex_provider_state(creds) + return except AuthError: - print("No usable Codex auth found. Running `codex login`...") - try: - subprocess.run(["codex", "login"], check=True) - except subprocess.CalledProcessError as exc: - print(f"Codex login failed with exit code {exc.returncode}.") - raise SystemExit(1) - except KeyboardInterrupt: - print("\nLogin cancelled.") - raise SystemExit(130) - try: - creds = resolve_codex_runtime_credentials() - except AuthError as exc: - print(format_auth_error(exc)) - raise SystemExit(1) + pass + # No existing creds (or user declined) -- run device code flow + print() + print("Signing in to OpenAI Codex...") + print() + + creds = _codex_device_code_login() + _save_codex_provider_state(creds) + + +def _save_codex_provider_state(creds: Dict[str, Any]) -> None: + """Persist Codex provider state to auth store and config.""" auth_state = { "auth_file": creds.get("auth_file"), "codex_home": creds.get("codex_home"), @@ -1391,13 +1411,170 @@ def _login_openai_codex(args, pconfig: ProviderConfig) -> None: _save_provider_state(auth_store, "openai-codex", auth_state) saved_to = _save_auth_store(auth_store) - config_path = _update_config_for_provider("openai-codex", creds["base_url"]) + config_path = _update_config_for_provider("openai-codex", creds.get("base_url", DEFAULT_CODEX_BASE_URL)) print() print("Login successful!") print(f" Auth state: {saved_to}") print(f" Config updated: {config_path} (model.provider=openai-codex)") +def _codex_device_code_login() -> Dict[str, Any]: + """Run the OpenAI device code login flow and return credentials dict.""" + import time as _time + + issuer = "https://auth.openai.com" + client_id = CODEX_OAUTH_CLIENT_ID + + # Step 1: Request device code + try: + with httpx.Client(timeout=httpx.Timeout(15.0)) as client: + resp = client.post( + f"{issuer}/api/accounts/deviceauth/usercode", + json={"client_id": client_id}, + headers={"Content-Type": "application/json"}, + ) + except Exception as exc: + raise AuthError( + f"Failed to request device code: {exc}", + provider="openai-codex", code="device_code_request_failed", + ) + + if resp.status_code != 200: + raise AuthError( + f"Device code request returned status {resp.status_code}.", + provider="openai-codex", code="device_code_request_error", + ) + + device_data = resp.json() + user_code = device_data.get("user_code", "") + device_auth_id = device_data.get("device_auth_id", "") + poll_interval = max(3, int(device_data.get("interval", "5"))) + + if not user_code or not device_auth_id: + raise AuthError( + "Device code response missing required fields.", + provider="openai-codex", code="device_code_incomplete", + ) + + # Step 2: Show user the code + print("To continue, follow these steps:\n") + print(f" 1. Open this URL in your browser:") + print(f" \033[94m{issuer}/codex/device\033[0m\n") + print(f" 2. Enter this code:") + print(f" \033[94m{user_code}\033[0m\n") + print("Waiting for sign-in... (press Ctrl+C to cancel)") + + # Step 3: Poll for authorization code + max_wait = 15 * 60 # 15 minutes + start = _time.monotonic() + code_resp = None + + try: + with httpx.Client(timeout=httpx.Timeout(15.0)) as client: + while _time.monotonic() - start < max_wait: + _time.sleep(poll_interval) + poll_resp = client.post( + f"{issuer}/api/accounts/deviceauth/token", + json={"device_auth_id": device_auth_id, "user_code": user_code}, + headers={"Content-Type": "application/json"}, + ) + + if poll_resp.status_code == 200: + code_resp = poll_resp.json() + break + elif poll_resp.status_code in (403, 404): + continue # User hasn't completed login yet + else: + raise AuthError( + f"Device auth polling returned status {poll_resp.status_code}.", + provider="openai-codex", code="device_code_poll_error", + ) + except KeyboardInterrupt: + print("\nLogin cancelled.") + raise SystemExit(130) + + if code_resp is None: + raise AuthError( + "Login timed out after 15 minutes.", + provider="openai-codex", code="device_code_timeout", + ) + + # Step 4: Exchange authorization code for tokens + authorization_code = code_resp.get("authorization_code", "") + code_verifier = code_resp.get("code_verifier", "") + redirect_uri = f"{issuer}/deviceauth/callback" + + if not authorization_code or not code_verifier: + raise AuthError( + "Device auth response missing authorization_code or code_verifier.", + provider="openai-codex", code="device_code_incomplete_exchange", + ) + + try: + with httpx.Client(timeout=httpx.Timeout(15.0)) as client: + token_resp = client.post( + CODEX_OAUTH_TOKEN_URL, + data={ + "grant_type": "authorization_code", + "code": authorization_code, + "redirect_uri": redirect_uri, + "client_id": client_id, + "code_verifier": code_verifier, + }, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + ) + except Exception as exc: + raise AuthError( + f"Token exchange failed: {exc}", + provider="openai-codex", code="token_exchange_failed", + ) + + if token_resp.status_code != 200: + raise AuthError( + f"Token exchange returned status {token_resp.status_code}.", + provider="openai-codex", code="token_exchange_error", + ) + + tokens = token_resp.json() + access_token = tokens.get("access_token", "") + refresh_token = tokens.get("refresh_token", "") + + if not access_token: + raise AuthError( + "Token exchange did not return an access_token.", + provider="openai-codex", code="token_exchange_no_access_token", + ) + + # Step 5: Persist tokens to ~/.codex/auth.json + codex_home = resolve_codex_home_path() + codex_home.mkdir(parents=True, exist_ok=True) + auth_path = codex_home / "auth.json" + + payload = { + "tokens": { + "access_token": access_token, + "refresh_token": refresh_token, + }, + "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + } + _persist_codex_auth_payload(auth_path, payload, lock_held=False) + + base_url = ( + os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/") + or DEFAULT_CODEX_BASE_URL + ) + + return { + "api_key": access_token, + "base_url": base_url, + "auth_file": str(auth_path), + "codex_home": str(codex_home), + "last_refresh": payload["last_refresh"], + "auth_mode": "chatgpt", + "source": "device-code", + } + + def _login_nous(args, pconfig: ProviderConfig) -> None: """Nous Portal device authorization flow.""" portal_base_url = ( @@ -1579,6 +1756,6 @@ def logout_command(args) -> None: if os.getenv("OPENROUTER_API_KEY"): print("Hermes will use OpenRouter for inference.") else: - print("Run `hermes login` or configure an API key to use Hermes.") + print("Run `hermes model` or configure an API key to use Hermes.") else: print(f"No auth state found for {provider_name}.") diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py index ed1009c54..75559396f 100644 --- a/hermes_cli/codex_models.py +++ b/hermes_cli/codex_models.py @@ -1,21 +1,62 @@ -"""Codex model discovery from local Codex CLI cache/config.""" +"""Codex model discovery from API, local cache, and config.""" from __future__ import annotations import json +import logging from pathlib import Path from typing import List, Optional from hermes_cli.auth import resolve_codex_home_path +logger = logging.getLogger(__name__) + DEFAULT_CODEX_MODELS: List[str] = [ - "gpt-5-codex", "gpt-5.3-codex", "gpt-5.2-codex", - "gpt-5.1-codex", + "gpt-5.1-codex-max", + "gpt-5.1-codex-mini", ] +def _fetch_models_from_api(access_token: str) -> List[str]: + """Fetch available models from the Codex API. Returns visible models sorted by priority.""" + try: + import httpx + resp = httpx.get( + "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0", + headers={"Authorization": f"Bearer {access_token}"}, + timeout=10, + ) + if resp.status_code != 200: + return [] + data = resp.json() + entries = data.get("models", []) if isinstance(data, dict) else [] + except Exception as exc: + logger.debug("Failed to fetch Codex models from API: %s", exc) + return [] + + sortable = [] + for item in entries: + if not isinstance(item, dict): + continue + slug = item.get("slug") + if not isinstance(slug, str) or not slug.strip(): + continue + slug = slug.strip() + if item.get("supported_in_api") is False: + continue + visibility = item.get("visibility", "") + if isinstance(visibility, str) and visibility.strip().lower() == "hide": + continue + priority = item.get("priority") + rank = int(priority) if isinstance(priority, (int, float)) else 10_000 + sortable.append((rank, slug)) + + sortable.sort(key=lambda x: (x[0], x[1])) + return [slug for _, slug in sortable] + + def _read_default_model(codex_home: Path) -> Optional[str]: config_path = codex_home / "config.toml" if not config_path.exists(): @@ -72,10 +113,22 @@ def _read_cache_models(codex_home: Path) -> List[str]: return deduped -def get_codex_model_ids() -> List[str]: +def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]: + """Return available Codex model IDs, trying API first, then local sources. + + Resolution order: API (live, if token provided) > config.toml default > + local cache > hardcoded defaults. + """ codex_home = resolve_codex_home_path() ordered: List[str] = [] + # Try live API if we have a token + if access_token: + api_models = _fetch_models_from_api(access_token) + if api_models: + return api_models + + # Fall back to local sources default_model = _read_default_model(codex_home) if default_model: ordered.append(default_model) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index b0965e547..2bc391aad 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -12,7 +12,6 @@ Usage: hermes gateway install # Install gateway service hermes gateway uninstall # Uninstall gateway service hermes setup # Interactive setup wizard - hermes login # Authenticate with Nous Portal (or other providers) hermes logout # Clear stored authentication hermes status # Show status of all components hermes cron # Manage cron jobs @@ -547,7 +546,14 @@ def _model_flow_openai_codex(config, current_model=""): print(f"Login failed: {exc}") return - codex_models = get_codex_model_ids() + _codex_token = None + try: + from hermes_cli.auth import resolve_codex_runtime_credentials + _codex_creds = resolve_codex_runtime_credentials() + _codex_token = _codex_creds.get("api_key") + except Exception: + pass + codex_models = get_codex_model_ids(access_token=_codex_token) selected = _prompt_model_selection(codex_models, current_model=current_model) if selected: @@ -827,8 +833,8 @@ def cmd_update(args): pass # No systemd (macOS, WSL1, etc.) — skip silently print() - print("Tip: You can now log in with Nous Portal for inference:") - print(" hermes login # Authenticate with Nous Portal") + print("Tip: You can now select a provider and model:") + print(" hermes model # Select provider and model") except subprocess.CalledProcessError as e: print(f"✗ Update failed: {e}") @@ -848,7 +854,6 @@ Examples: hermes --continue Resume the most recent session hermes --resume Resume a specific session hermes setup Run setup wizard - hermes login Authenticate with an inference provider hermes logout Clear stored authentication hermes model Select default model hermes config View configuration diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index d11cb4b7a..fa4dcebb4 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -621,11 +621,23 @@ def run_setup_wizard(args): format_auth_error, AuthError, fetch_nous_models, resolve_nous_runtime_credentials, _update_config_for_provider, _login_openai_codex, get_codex_auth_status, DEFAULT_CODEX_BASE_URL, + detect_external_credentials, ) existing_custom = get_env_value("OPENAI_BASE_URL") existing_or = get_env_value("OPENROUTER_API_KEY") active_oauth = get_active_provider() + # Detect credentials from other CLI tools + detected_creds = detect_external_credentials() + if detected_creds: + print_info("Detected existing credentials:") + for cred in detected_creds: + if cred["provider"] == "openai-codex": + print_success(f" * {cred['label']} -- select \"OpenAI Codex\" to use it") + else: + print_info(f" * {cred['label']}") + print() + # Detect if any provider is already configured has_any_provider = bool(active_oauth or existing_custom or existing_or) @@ -694,11 +706,11 @@ def run_setup_wizard(args): except SystemExit: print_warning("Nous Portal login was cancelled or failed.") - print_info("You can try again later with: hermes login") + print_info("You can try again later with: hermes model") selected_provider = None except Exception as e: print_error(f"Login failed: {e}") - print_info("You can try again later with: hermes login") + print_info("You can try again later with: hermes model") selected_provider = None elif provider_idx == 1: # OpenAI Codex @@ -718,11 +730,11 @@ def run_setup_wizard(args): _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) except SystemExit: print_warning("OpenAI Codex login was cancelled or failed.") - print_info("You can try again later with: hermes login --provider openai-codex") + print_info("You can try again later with: hermes model") selected_provider = None except Exception as e: print_error(f"Login failed: {e}") - print_info("You can try again later with: hermes login --provider openai-codex") + print_info("You can try again later with: hermes model") selected_provider = None elif provider_idx == 2: # OpenRouter @@ -834,7 +846,15 @@ def run_setup_wizard(args): # else: keep current elif selected_provider == "openai-codex": from hermes_cli.codex_models import get_codex_model_ids - codex_models = get_codex_model_ids() + # Try to get the access token for live model discovery + _codex_token = None + try: + from hermes_cli.auth import resolve_codex_runtime_credentials + _codex_creds = resolve_codex_runtime_credentials() + _codex_token = _codex_creds.get("api_key") + except Exception: + pass + codex_models = get_codex_model_ids(access_token=_codex_token) model_choices = [f"{m}" for m in codex_models] model_choices.append("Custom model") model_choices.append(f"Keep current ({current_model})") diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 81b55cab7..f1d3a7edf 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -111,7 +111,7 @@ def show_status(args): nous_logged_in = bool(nous_status.get("logged_in")) print( f" {'Nous Portal':<12} {check_mark(nous_logged_in)} " - f"{'logged in' if nous_logged_in else 'not logged in (run: hermes login)'}" + f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}" ) if nous_logged_in: portal_url = nous_status.get("portal_base_url") or "(unknown)" @@ -126,7 +126,7 @@ def show_status(args): codex_logged_in = bool(codex_status.get("logged_in")) print( f" {'OpenAI Codex':<12} {check_mark(codex_logged_in)} " - f"{'logged in' if codex_logged_in else 'not logged in (run: hermes login --provider openai-codex)'}" + f"{'logged in' if codex_logged_in else 'not logged in (run: hermes model)'}" ) codex_auth_file = codex_status.get("auth_file") if codex_auth_file: diff --git a/run_agent.py b/run_agent.py index ec634b7ab..f025e6b98 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1432,6 +1432,14 @@ class AIAgent: content_text = str(content) if content is not None else "" if role == "assistant": + # Replay encrypted reasoning items from previous turns + # so the API can maintain coherent reasoning chains. + codex_reasoning = msg.get("codex_reasoning_items") + if isinstance(codex_reasoning, list): + for ri in codex_reasoning: + if isinstance(ri, dict) and ri.get("encrypted_content"): + items.append(ri) + if content_text.strip(): items.append({"role": "assistant", "content": content_text}) @@ -1638,7 +1646,10 @@ class AIAgent: if store is not False: raise ValueError("Codex Responses contract requires 'store' to be false.") - allowed_keys = {"model", "instructions", "input", "tools", "store"} + allowed_keys = { + "model", "instructions", "input", "tools", "store", + "reasoning", "include", "max_output_tokens", "temperature", + } normalized: Dict[str, Any] = { "model": model, "instructions": instructions, @@ -1647,6 +1658,22 @@ class AIAgent: "store": False, } + # Pass through reasoning config + reasoning = api_kwargs.get("reasoning") + if isinstance(reasoning, dict): + normalized["reasoning"] = reasoning + include = api_kwargs.get("include") + if isinstance(include, list): + normalized["include"] = include + + # Pass through max_output_tokens and temperature + max_output_tokens = api_kwargs.get("max_output_tokens") + if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0: + normalized["max_output_tokens"] = int(max_output_tokens) + temperature = api_kwargs.get("temperature") + if isinstance(temperature, (int, float)): + normalized["temperature"] = float(temperature) + if allow_stream: stream = api_kwargs.get("stream") if stream is not None and stream is not True: @@ -1719,6 +1746,7 @@ class AIAgent: content_parts: List[str] = [] reasoning_parts: List[str] = [] + reasoning_items_raw: List[Dict[str, Any]] = [] tool_calls: List[Any] = [] has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"} saw_commentary_phase = False @@ -1750,6 +1778,16 @@ class AIAgent: reasoning_text = self._extract_responses_reasoning_text(item) if reasoning_text: reasoning_parts.append(reasoning_text) + # Capture the full reasoning item for multi-turn continuity. + # encrypted_content is an opaque blob the API needs back on + # subsequent turns to maintain coherent reasoning chains. + encrypted = getattr(item, "encrypted_content", None) + if isinstance(encrypted, str) and encrypted: + raw_item = {"type": "reasoning", "encrypted_content": encrypted} + item_id = getattr(item, "id", None) + if isinstance(item_id, str) and item_id: + raw_item["id"] = item_id + reasoning_items_raw.append(raw_item) elif item_type == "function_call": if item_status in {"queued", "in_progress", "incomplete"}: continue @@ -1807,6 +1845,7 @@ class AIAgent: reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None, reasoning_content=None, reasoning_details=None, + codex_reasoning_items=reasoning_items_raw or None, ) if tool_calls: @@ -1819,7 +1858,6 @@ class AIAgent: def _run_codex_stream(self, api_kwargs: dict): """Execute one streaming Responses API request and return the final response.""" - api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False) max_stream_retries = 1 for attempt in range(max_stream_retries + 1): try: @@ -1971,14 +2009,29 @@ class AIAgent: if not instructions: instructions = DEFAULT_AGENT_IDENTITY - return { + kwargs = { "model": self.model, "instructions": instructions, "input": self._chat_messages_to_responses_input(payload_messages), "tools": self._responses_tools(), "store": False, + "reasoning": {"effort": "medium", "summary": "auto"}, + "include": ["reasoning.encrypted_content"], } + # Apply reasoning effort from config if set + if self.reasoning_config and isinstance(self.reasoning_config, dict): + if self.reasoning_config.get("enabled") is False: + kwargs.pop("reasoning", None) + kwargs["include"] = [] + elif self.reasoning_config.get("effort"): + kwargs["reasoning"]["effort"] = self.reasoning_config["effort"] + + if self.max_tokens is not None: + kwargs["max_output_tokens"] = self.max_tokens + + return kwargs + provider_preferences = {} if self.providers_allowed: provider_preferences["only"] = self.providers_allowed @@ -2045,11 +2098,27 @@ class AIAgent: } if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details: - msg["reasoning_details"] = [ - {"type": d.get("type"), "text": d.get("text"), "signature": d.get("signature")} - for d in assistant_message.reasoning_details - if isinstance(d, dict) - ] + # Pass reasoning_details back unmodified so providers (OpenRouter, + # Anthropic, OpenAI) can maintain reasoning continuity across turns. + # Each provider may include opaque fields (signature, encrypted_content) + # that must be preserved exactly. + raw_details = assistant_message.reasoning_details + preserved = [] + for d in raw_details: + if isinstance(d, dict): + preserved.append(d) + elif hasattr(d, "__dict__"): + preserved.append(d.__dict__) + elif hasattr(d, "model_dump"): + preserved.append(d.model_dump()) + if preserved: + msg["reasoning_details"] = preserved + + # Codex Responses API: preserve encrypted reasoning items for + # multi-turn continuity. These get replayed as input on the next turn. + codex_items = getattr(assistant_message, "codex_reasoning_items", None) + if codex_items: + msg["codex_reasoning_items"] = codex_items if assistant_message.tool_calls: tool_calls = [] @@ -2152,40 +2221,68 @@ class AIAgent: messages.pop() # remove flush msg return - api_kwargs = { - "model": self.model, - "messages": api_messages, - "tools": [memory_tool_def], - "temperature": 0.3, - **self._max_tokens_param(1024), - } + # Use auxiliary client for the flush call when available -- + # it's cheaper and avoids Codex Responses API incompatibility. + from agent.auxiliary_client import get_text_auxiliary_client + aux_client, aux_model = get_text_auxiliary_client() - response = self.client.chat.completions.create(**api_kwargs, timeout=30.0) + if aux_client: + api_kwargs = { + "model": aux_model, + "messages": api_messages, + "tools": [memory_tool_def], + "temperature": 0.3, + "max_tokens": 5120, + } + response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0) + elif self.api_mode == "codex_responses": + # No auxiliary client -- use the Codex Responses path directly + codex_kwargs = self._build_api_kwargs(api_messages) + codex_kwargs["tools"] = self._responses_tools([memory_tool_def]) + codex_kwargs["temperature"] = 0.3 + if "max_output_tokens" in codex_kwargs: + codex_kwargs["max_output_tokens"] = 5120 + response = self._run_codex_stream(codex_kwargs) + else: + api_kwargs = { + "model": self.model, + "messages": api_messages, + "tools": [memory_tool_def], + "temperature": 0.3, + **self._max_tokens_param(5120), + } + response = self.client.chat.completions.create(**api_kwargs, timeout=30.0) - if response.choices: + # Extract tool calls from the response, handling both API formats + tool_calls = [] + if self.api_mode == "codex_responses" and not aux_client: + assistant_msg, _ = self._normalize_codex_response(response) + if assistant_msg and assistant_msg.tool_calls: + tool_calls = assistant_msg.tool_calls + elif hasattr(response, "choices") and response.choices: assistant_message = response.choices[0].message if assistant_message.tool_calls: - # Execute only memory tool calls - for tc in assistant_message.tool_calls: - if tc.function.name == "memory": - try: - args = json.loads(tc.function.arguments) - flush_target = args.get("target", "memory") - from tools.memory_tool import memory_tool as _memory_tool - result = _memory_tool( - action=args.get("action"), - target=flush_target, - content=args.get("content"), - old_text=args.get("old_text"), - store=self._memory_store, - ) - # Also send user observations to Honcho when active - if self._honcho and flush_target == "user" and args.get("action") == "add": - self._honcho_save_user_observation(args.get("content", "")) - if not self.quiet_mode: - print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}") - except Exception as e: - logger.debug("Memory flush tool call failed: %s", e) + tool_calls = assistant_message.tool_calls + + for tc in tool_calls: + if tc.function.name == "memory": + try: + args = json.loads(tc.function.arguments) + flush_target = args.get("target", "memory") + from tools.memory_tool import memory_tool as _memory_tool + result = _memory_tool( + action=args.get("action"), + target=flush_target, + content=args.get("content"), + old_text=args.get("old_text"), + store=self._memory_store, + ) + if self._honcho and flush_target == "user" and args.get("action") == "add": + self._honcho_save_user_observation(args.get("content", "")) + if not self.quiet_mode: + print(f" 🧠 Memory flush: saved to {args.get('target', 'memory')}") + except Exception as e: + logger.debug("Memory flush tool call failed: %s", e) except Exception as e: logger.debug("Memory flush API call failed: %s", e) finally: @@ -2493,32 +2590,19 @@ class AIAgent: if _is_nous: summary_extra_body["tags"] = ["product=hermes-agent"] - summary_kwargs = { - "model": self.model, - "messages": api_messages, - } - if self.max_tokens is not None: - summary_kwargs.update(self._max_tokens_param(self.max_tokens)) - if summary_extra_body: - summary_kwargs["extra_body"] = summary_extra_body - - summary_response = self.client.chat.completions.create(**summary_kwargs) - - if summary_response.choices and summary_response.choices[0].message.content: - final_response = summary_response.choices[0].message.content - if "" in final_response: - final_response = re.sub(r'.*?\s*', '', final_response, flags=re.DOTALL).strip() - if final_response: - messages.append({"role": "assistant", "content": final_response}) - else: - final_response = "I reached the iteration limit and couldn't generate a summary." + if self.api_mode == "codex_responses": + codex_kwargs = self._build_api_kwargs(api_messages) + codex_kwargs["tools"] = None + summary_response = self._run_codex_stream(codex_kwargs) + assistant_message, _ = self._normalize_codex_response(summary_response) + final_response = (assistant_message.content or "").strip() if assistant_message else "" else: summary_kwargs = { "model": self.model, "messages": api_messages, } if self.max_tokens is not None: - summary_kwargs["max_tokens"] = self.max_tokens + summary_kwargs.update(self._max_tokens_param(self.max_tokens)) if summary_extra_body: summary_kwargs["extra_body"] = summary_extra_body @@ -2526,6 +2610,42 @@ class AIAgent: if summary_response.choices and summary_response.choices[0].message.content: final_response = summary_response.choices[0].message.content + else: + final_response = "" + + if final_response: + if "" in final_response: + final_response = re.sub(r'.*?\s*', '', final_response, flags=re.DOTALL).strip() + if final_response: + messages.append({"role": "assistant", "content": final_response}) + else: + final_response = "I reached the iteration limit and couldn't generate a summary." + else: + # Retry summary generation + if self.api_mode == "codex_responses": + codex_kwargs = self._build_api_kwargs(api_messages) + codex_kwargs["tools"] = None + retry_response = self._run_codex_stream(codex_kwargs) + retry_msg, _ = self._normalize_codex_response(retry_response) + final_response = (retry_msg.content or "").strip() if retry_msg else "" + else: + summary_kwargs = { + "model": self.model, + "messages": api_messages, + } + if self.max_tokens is not None: + summary_kwargs["max_tokens"] = self.max_tokens + if summary_extra_body: + summary_kwargs["extra_body"] = summary_extra_body + + summary_response = self.client.chat.completions.create(**summary_kwargs) + + if summary_response.choices and summary_response.choices[0].message.content: + final_response = summary_response.choices[0].message.content + else: + final_response = "" + + if final_response: if "" in final_response: final_response = re.sub(r'.*?\s*', '', final_response, flags=re.DOTALL).strip() messages.append({"role": "assistant", "content": final_response}) diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py new file mode 100644 index 000000000..efcbce29f --- /dev/null +++ b/tests/agent/test_auxiliary_client.py @@ -0,0 +1,168 @@ +"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback.""" + +import json +import os +from pathlib import Path +from unittest.mock import patch, MagicMock + +import pytest + +from agent.auxiliary_client import ( + get_text_auxiliary_client, + get_vision_auxiliary_client, + auxiliary_max_tokens_param, + _read_codex_access_token, +) + + +@pytest.fixture(autouse=True) +def _clean_env(monkeypatch): + """Strip provider env vars so each test starts clean.""" + for key in ( + "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", + "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL", + ): + monkeypatch.delenv(key, raising=False) + + +@pytest.fixture +def codex_auth_dir(tmp_path, monkeypatch): + """Provide a writable ~/.codex/ directory with a valid auth.json.""" + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + auth_file = codex_dir / "auth.json" + auth_file.write_text(json.dumps({ + "tokens": { + "access_token": "codex-test-token-abc123", + "refresh_token": "codex-refresh-xyz", + } + })) + monkeypatch.setattr( + "agent.auxiliary_client._read_codex_access_token", + lambda: "codex-test-token-abc123", + ) + return codex_dir + + +class TestReadCodexAccessToken: + def test_valid_auth_file(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + auth = codex_dir / "auth.json" + auth.write_text(json.dumps({ + "tokens": {"access_token": "tok-123", "refresh_token": "r-456"} + })) + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result == "tok-123" + + def test_missing_file_returns_none(self, tmp_path): + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result is None + + def test_empty_token_returns_none(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + auth = codex_dir / "auth.json" + auth.write_text(json.dumps({"tokens": {"access_token": " "}})) + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result is None + + def test_malformed_json_returns_none(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + (codex_dir / "auth.json").write_text("{bad json") + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result is None + + def test_missing_tokens_key_returns_none(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + (codex_dir / "auth.json").write_text(json.dumps({"other": "data"})) + with patch("agent.auxiliary_client.Path.home", return_value=tmp_path): + result = _read_codex_access_token() + assert result is None + + +class TestGetTextAuxiliaryClient: + """Test the full resolution chain for get_text_auxiliary_client.""" + + def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client() + assert model == "google/gemini-3-flash-preview" + mock_openai.assert_called_once() + call_kwargs = mock_openai.call_args + assert call_kwargs.kwargs["api_key"] == "or-key" + + def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir): + with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + mock_nous.return_value = {"access_token": "nous-tok"} + client, model = get_text_auxiliary_client() + assert model == "gemini-3-flash" + + def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir): + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") + monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key") + # Override the autouse monkeypatch for codex + monkeypatch.setattr( + "agent.auxiliary_client._read_codex_access_token", + lambda: "codex-test-token-abc123", + ) + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client() + assert model == "gpt-4o-mini" + call_kwargs = mock_openai.call_args + assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1" + + def test_codex_fallback_when_nothing_else(self, codex_auth_dir): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client() + assert model == "gpt-5.3-codex" + # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client + from agent.auxiliary_client import CodexAuxiliaryClient + assert isinstance(client, CodexAuxiliaryClient) + + def test_returns_none_when_nothing_available(self): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None): + client, model = get_text_auxiliary_client() + assert client is None + assert model is None + + +class TestCodexNotInVisionClient: + """Codex fallback should NOT apply to vision tasks.""" + + def test_vision_returns_none_without_openrouter_nous(self): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None): + client, model = get_vision_auxiliary_client() + assert client is None + assert model is None + + +class TestAuxiliaryMaxTokensParam: + def test_codex_fallback_uses_max_tokens(self, monkeypatch): + """Codex adapter translates max_tokens internally, so we return max_tokens.""" + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"): + result = auxiliary_max_tokens_param(1024) + assert result == {"max_tokens": 1024} + + def test_openrouter_uses_max_tokens(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + result = auxiliary_max_tokens_param(1024) + assert result == {"max_tokens": 1024} + + def test_no_provider_uses_max_tokens(self): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None): + result = auxiliary_max_tokens_param(1024) + assert result == {"max_tokens": 1024} diff --git a/tests/test_auth_codex_provider.py b/tests/test_auth_codex_provider.py index de490754c..7d3076807 100644 --- a/tests/test_auth_codex_provider.py +++ b/tests/test_auth_codex_provider.py @@ -185,8 +185,8 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch): _write_codex_auth(codex_home) monkeypatch.setenv("HERMES_HOME", str(hermes_home)) monkeypatch.setenv("CODEX_HOME", str(codex_home)) - monkeypatch.setattr("hermes_cli.auth.shutil.which", lambda _: "/usr/local/bin/codex") - monkeypatch.setattr("hermes_cli.auth.subprocess.run", lambda *a, **k: None) + # Mock input() to accept existing credentials + monkeypatch.setattr("builtins.input", lambda _: "y") _login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"]) @@ -201,19 +201,10 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch): assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL -def test_login_command_defaults_to_nous(monkeypatch): - calls = {"nous": 0, "codex": 0} - - def _fake_nous(args, pconfig): - calls["nous"] += 1 - - def _fake_codex(args, pconfig): - calls["codex"] += 1 - - monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_nous) - monkeypatch.setattr("hermes_cli.auth._login_openai_codex", _fake_codex) - - login_command(SimpleNamespace()) - - assert calls["nous"] == 1 - assert calls["codex"] == 0 +def test_login_command_shows_deprecation(monkeypatch, capsys): + """login_command is deprecated and directs users to hermes model.""" + with pytest.raises(SystemExit) as exc_info: + login_command(SimpleNamespace()) + assert exc_info.value.code == 0 + captured = capsys.readouterr() + assert "hermes model" in captured.out diff --git a/tests/test_cli_init.py b/tests/test_cli_init.py new file mode 100644 index 000000000..90ce05c72 --- /dev/null +++ b/tests/test_cli_init.py @@ -0,0 +1,80 @@ +"""Tests for HermesCLI initialization -- catches configuration bugs +that only manifest at runtime (not in mocked unit tests).""" + +import os +import sys +from unittest.mock import patch, MagicMock + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + + +def _make_cli(**kwargs): + """Create a HermesCLI instance with minimal mocking.""" + from cli import HermesCLI + with patch("cli.get_tool_definitions", return_value=[]): + return HermesCLI(**kwargs) + + +class TestMaxTurnsResolution: + """max_turns must always resolve to a positive integer, never None.""" + + def test_default_max_turns_is_integer(self): + cli = _make_cli() + assert isinstance(cli.max_turns, int) + assert cli.max_turns > 0 + + def test_explicit_max_turns_honored(self): + cli = _make_cli(max_turns=25) + assert cli.max_turns == 25 + + def test_none_max_turns_gets_default(self): + cli = _make_cli(max_turns=None) + assert isinstance(cli.max_turns, int) + assert cli.max_turns > 0 + + def test_env_var_max_turns(self, monkeypatch): + """Env var is used when config file doesn't set max_turns.""" + monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42") + import cli as cli_module + original = cli_module.CLI_CONFIG["agent"].get("max_turns") + cli_module.CLI_CONFIG["agent"]["max_turns"] = None + try: + cli_obj = _make_cli() + assert cli_obj.max_turns == 42 + finally: + if original is not None: + cli_module.CLI_CONFIG["agent"]["max_turns"] = original + + def test_max_turns_never_none_for_agent(self): + """The value passed to AIAgent must never be None (causes TypeError in run_conversation).""" + cli = _make_cli() + assert cli.max_turns is not None + + +class TestVerboseAndToolProgress: + def test_default_verbose_is_bool(self): + cli = _make_cli() + assert isinstance(cli.verbose, bool) + + def test_tool_progress_mode_is_string(self): + cli = _make_cli() + assert isinstance(cli.tool_progress_mode, str) + assert cli.tool_progress_mode in ("off", "new", "all", "verbose") + + +class TestProviderResolution: + def test_api_key_is_string_or_none(self): + cli = _make_cli() + assert cli.api_key is None or isinstance(cli.api_key, str) + + def test_base_url_is_string(self): + cli = _make_cli() + assert isinstance(cli.base_url, str) + assert cli.base_url.startswith("http") + + def test_model_is_string(self): + cli = _make_cli() + assert isinstance(cli.model, str) + assert len(cli.model) > 0 diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py index 13ce5d7ac..ef24f02b5 100644 --- a/tests/test_codex_execution_paths.py +++ b/tests/test_codex_execution_paths.py @@ -149,6 +149,11 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch): runner._prefill_messages = [] runner._reasoning_config = None runner._running_agents = {} + from unittest.mock import MagicMock, AsyncMock + runner.hooks = MagicMock() + runner.hooks.emit = AsyncMock() + runner.hooks.loaded_hooks = [] + runner._session_db = None source = SessionSource( platform=Platform.LOCAL, diff --git a/tests/test_external_credential_detection.py b/tests/test_external_credential_detection.py new file mode 100644 index 000000000..a1fe2a2f9 --- /dev/null +++ b/tests/test_external_credential_detection.py @@ -0,0 +1,51 @@ +"""Tests for detect_external_credentials() -- Phase 2 credential sync.""" + +import json +from pathlib import Path +from unittest.mock import patch + +import pytest + +from hermes_cli.auth import detect_external_credentials + + +class TestDetectCodexCLI: + def test_detects_valid_codex_auth(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + auth = codex_dir / "auth.json" + auth.write_text(json.dumps({ + "tokens": {"access_token": "tok-123", "refresh_token": "ref-456"} + })) + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir): + result = detect_external_credentials() + codex_hits = [c for c in result if c["provider"] == "openai-codex"] + assert len(codex_hits) == 1 + assert "Codex CLI" in codex_hits[0]["label"] + assert str(auth) == codex_hits[0]["path"] + + def test_skips_codex_without_access_token(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + (codex_dir / "auth.json").write_text(json.dumps({"tokens": {}})) + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir): + result = detect_external_credentials() + assert not any(c["provider"] == "openai-codex" for c in result) + + def test_skips_missing_codex_dir(self, tmp_path): + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / "nonexistent"): + result = detect_external_credentials() + assert not any(c["provider"] == "openai-codex" for c in result) + + def test_skips_malformed_codex_auth(self, tmp_path): + codex_dir = tmp_path / ".codex" + codex_dir.mkdir() + (codex_dir / "auth.json").write_text("{bad json") + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir): + result = detect_external_credentials() + assert not any(c["provider"] == "openai-codex" for c in result) + + def test_returns_empty_when_nothing_found(self, tmp_path): + with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / ".codex"): + result = detect_external_credentials() + assert result == [] diff --git a/tests/test_flush_memories_codex.py b/tests/test_flush_memories_codex.py new file mode 100644 index 000000000..22eef5ab0 --- /dev/null +++ b/tests/test_flush_memories_codex.py @@ -0,0 +1,225 @@ +"""Tests for flush_memories() working correctly across all provider modes. + +Catches the bug where Codex mode called chat.completions.create on a +Responses-only client, which would fail silently or with a 404. +""" + +import json +import os +import sys +import types +from types import SimpleNamespace +from unittest.mock import patch, MagicMock, call + +import pytest + +sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None)) +sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object)) +sys.modules.setdefault("fal_client", types.SimpleNamespace()) + +import run_agent + + +class _FakeOpenAI: + def __init__(self, **kwargs): + self.kwargs = kwargs + self.api_key = kwargs.get("api_key", "test") + self.base_url = kwargs.get("base_url", "http://test") + + def close(self): + pass + + +def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"): + """Build an AIAgent with mocked internals, ready for flush_memories testing.""" + monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [ + { + "type": "function", + "function": { + "name": "memory", + "description": "Manage memories.", + "parameters": { + "type": "object", + "properties": { + "action": {"type": "string"}, + "target": {"type": "string"}, + "content": {"type": "string"}, + }, + }, + }, + }, + ]) + monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {}) + monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI) + + agent = run_agent.AIAgent( + api_key="test-key", + base_url="https://test.example.com/v1", + provider=provider, + api_mode=api_mode, + max_iterations=4, + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + # Give it a valid memory store + agent._memory_store = MagicMock() + agent._memory_flush_min_turns = 1 + agent._user_turn_count = 5 + return agent + + +def _chat_response_with_memory_call(): + """Simulated chat completions response with a memory tool call.""" + return SimpleNamespace( + choices=[SimpleNamespace( + message=SimpleNamespace( + content=None, + tool_calls=[SimpleNamespace( + function=SimpleNamespace( + name="memory", + arguments=json.dumps({ + "action": "add", + "target": "notes", + "content": "User prefers dark mode.", + }), + ), + )], + ), + )], + usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120), + ) + + +class TestFlushMemoriesUsesAuxiliaryClient: + """When an auxiliary client is available, flush_memories should use it + instead of self.client -- especially critical in Codex mode.""" + + def test_flush_uses_auxiliary_when_available(self, monkeypatch): + agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex") + + mock_aux_client = MagicMock() + mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call() + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": "Remember this"}, + ] + with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory: + agent.flush_memories(messages) + + mock_aux_client.chat.completions.create.assert_called_once() + call_kwargs = mock_aux_client.chat.completions.create.call_args + assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini" + + def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch): + """Non-Codex mode with no auxiliary falls back to self.client.""" + agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") + agent.client = MagicMock() + agent.client.chat.completions.create.return_value = _chat_response_with_memory_call() + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": "Save this"}, + ] + with patch("tools.memory_tool.memory_tool", return_value="Saved."): + agent.flush_memories(messages) + + agent.client.chat.completions.create.assert_called_once() + + def test_flush_executes_memory_tool_calls(self, monkeypatch): + """Verify that memory tool calls from the flush response actually get executed.""" + agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") + + mock_aux_client = MagicMock() + mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call() + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + {"role": "user", "content": "Note this"}, + ] + with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory: + agent.flush_memories(messages) + + mock_memory.assert_called_once() + call_kwargs = mock_memory.call_args + assert call_kwargs.kwargs["action"] == "add" + assert call_kwargs.kwargs["target"] == "notes" + assert "dark mode" in call_kwargs.kwargs["content"] + + def test_flush_strips_artifacts_from_messages(self, monkeypatch): + """After flush, the flush prompt and any response should be removed from messages.""" + agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") + + mock_aux_client = MagicMock() + mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call() + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")): + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + {"role": "user", "content": "Remember X"}, + ] + original_len = len(messages) + with patch("tools.memory_tool.memory_tool", return_value="Saved."): + agent.flush_memories(messages) + + # Messages should not grow from the flush + assert len(messages) <= original_len + # No flush sentinel should remain + for msg in messages: + assert "_flush_sentinel" not in msg + + +class TestFlushMemoriesCodexFallback: + """When no auxiliary client exists and we're in Codex mode, flush should + use the Codex Responses API path instead of chat.completions.""" + + def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch): + agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex") + + codex_response = SimpleNamespace( + output=[ + SimpleNamespace( + type="function_call", + call_id="call_1", + name="memory", + arguments=json.dumps({ + "action": "add", + "target": "notes", + "content": "Codex flush test", + }), + ), + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60), + status="completed", + model="gpt-5-codex", + ) + + with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \ + patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \ + patch.object(agent, "_build_api_kwargs") as mock_build, \ + patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory: + mock_build.return_value = { + "model": "gpt-5-codex", + "instructions": "test", + "input": [], + "tools": [], + "max_output_tokens": 4096, + } + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + {"role": "user", "content": "Save this"}, + ] + agent.flush_memories(messages) + + mock_stream.assert_called_once() + mock_memory.assert_called_once() + assert mock_memory.call_args.kwargs["content"] == "Codex flush test" diff --git a/tests/test_provider_parity.py b/tests/test_provider_parity.py new file mode 100644 index 000000000..82199ac4c --- /dev/null +++ b/tests/test_provider_parity.py @@ -0,0 +1,460 @@ +"""Provider parity tests: verify that AIAgent builds correct API kwargs +and handles responses properly for all supported providers. + +Ensures changes to one provider path don't silently break another. +""" + +import json +import os +import sys +import types +from types import SimpleNamespace +from unittest.mock import patch, MagicMock + +import pytest + +sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None)) +sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object)) +sys.modules.setdefault("fal_client", types.SimpleNamespace()) + +from run_agent import AIAgent + + +# ── Helpers ────────────────────────────────────────────────────────────────── + +def _tool_defs(*names): + return [ + { + "type": "function", + "function": { + "name": n, + "description": f"{n} tool", + "parameters": {"type": "object", "properties": {}}, + }, + } + for n in names + ] + + +class _FakeOpenAI: + def __init__(self, **kw): + self.api_key = kw.get("api_key", "test") + self.base_url = kw.get("base_url", "http://test") + def close(self): + pass + + +def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"): + monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal")) + monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {}) + monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI) + return AIAgent( + api_key="test-key", + base_url=base_url, + provider=provider, + api_mode=api_mode, + max_iterations=4, + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + +# ── _build_api_kwargs tests ───────────────────────────────────────────────── + +class TestBuildApiKwargsOpenRouter: + def test_uses_chat_completions_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "messages" in kwargs + assert "model" in kwargs + assert kwargs["messages"][-1]["content"] == "hi" + + def test_includes_reasoning_in_extra_body(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + extra = kwargs.get("extra_body", {}) + assert "reasoning" in extra + assert extra["reasoning"]["enabled"] is True + + def test_includes_tools(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "tools" in kwargs + tool_names = [t["function"]["name"] for t in kwargs["tools"]] + assert "web_search" in tool_names + + def test_no_responses_api_fields(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "input" not in kwargs + assert "instructions" not in kwargs + assert "store" not in kwargs + + +class TestBuildApiKwargsNousPortal: + def test_includes_nous_product_tags(self, monkeypatch): + agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + extra = kwargs.get("extra_body", {}) + assert extra.get("tags") == ["product=hermes-agent"] + + def test_uses_chat_completions_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "messages" in kwargs + assert "input" not in kwargs + + +class TestBuildApiKwargsCustomEndpoint: + def test_uses_chat_completions_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "messages" in kwargs + assert "input" not in kwargs + + def test_no_openrouter_extra_body(self, monkeypatch): + agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + extra = kwargs.get("extra_body", {}) + assert "reasoning" not in extra + + +class TestBuildApiKwargsCodex: + def test_uses_responses_api_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "input" in kwargs + assert "instructions" in kwargs + assert "messages" not in kwargs + assert kwargs["store"] is False + + def test_includes_reasoning_config(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "reasoning" in kwargs + assert kwargs["reasoning"]["effort"] == "medium" + + def test_includes_encrypted_content_in_include(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + assert "reasoning.encrypted_content" in kwargs.get("include", []) + + def test_tools_converted_to_responses_format(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hi"}] + kwargs = agent._build_api_kwargs(messages) + tools = kwargs.get("tools", []) + assert len(tools) > 0 + # Responses format has "name" at top level, not nested under "function" + assert "name" in tools[0] + assert "function" not in tools[0] + + +# ── Message conversion tests ──────────────────────────────────────────────── + +class TestChatMessagesToResponsesInput: + """Verify _chat_messages_to_responses_input for Codex mode.""" + + def test_user_message_passes_through(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "user", "content": "hello"}] + items = agent._chat_messages_to_responses_input(messages) + assert items == [{"role": "user", "content": "hello"}] + + def test_system_messages_filtered(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [ + {"role": "system", "content": "be helpful"}, + {"role": "user", "content": "hello"}, + ] + items = agent._chat_messages_to_responses_input(messages) + assert len(items) == 1 + assert items[0]["role"] == "user" + + def test_assistant_tool_calls_become_function_call_items(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{ + "role": "assistant", + "content": "", + "tool_calls": [{ + "id": "call_abc", + "call_id": "call_abc", + "function": {"name": "web_search", "arguments": '{"query": "test"}'}, + }], + }] + items = agent._chat_messages_to_responses_input(messages) + fc_items = [i for i in items if i.get("type") == "function_call"] + assert len(fc_items) == 1 + assert fc_items[0]["name"] == "web_search" + assert fc_items[0]["call_id"] == "call_abc" + + def test_tool_results_become_function_call_output(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}] + items = agent._chat_messages_to_responses_input(messages) + assert items[0]["type"] == "function_call_output" + assert items[0]["call_id"] == "call_abc" + assert items[0]["output"] == "result here" + + def test_encrypted_reasoning_replayed(self, monkeypatch): + """Encrypted reasoning items from previous turns must be included in input.""" + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [ + {"role": "user", "content": "think about this"}, + { + "role": "assistant", + "content": "I thought about it.", + "codex_reasoning_items": [ + {"type": "reasoning", "id": "rs_abc", "encrypted_content": "gAAAA_test_blob"}, + ], + }, + {"role": "user", "content": "continue"}, + ] + items = agent._chat_messages_to_responses_input(messages) + reasoning_items = [i for i in items if i.get("type") == "reasoning"] + assert len(reasoning_items) == 1 + assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob" + + def test_no_reasoning_items_for_non_codex_messages(self, monkeypatch): + """Messages without codex_reasoning_items should not inject anything.""" + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + messages = [ + {"role": "assistant", "content": "hi"}, + {"role": "user", "content": "hello"}, + ] + items = agent._chat_messages_to_responses_input(messages) + reasoning_items = [i for i in items if i.get("type") == "reasoning"] + assert len(reasoning_items) == 0 + + +# ── Response normalization tests ───────────────────────────────────────────── + +class TestNormalizeCodexResponse: + """Verify _normalize_codex_response extracts all fields correctly.""" + + def _make_codex_agent(self, monkeypatch): + return _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + + def test_text_response(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="message", status="completed", + content=[SimpleNamespace(type="output_text", text="Hello!")], + phase="final_answer"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert msg.content == "Hello!" + assert reason == "stop" + + def test_reasoning_summary_extracted(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="reasoning", + encrypted_content="gAAAA_blob", + summary=[SimpleNamespace(type="summary_text", text="Thinking about math")], + id="rs_123", status=None), + SimpleNamespace(type="message", status="completed", + content=[SimpleNamespace(type="output_text", text="42")], + phase="final_answer"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert msg.content == "42" + assert "math" in msg.reasoning + assert reason == "stop" + + def test_encrypted_content_captured(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="reasoning", + encrypted_content="gAAAA_secret_blob_123", + summary=[SimpleNamespace(type="summary_text", text="Thinking")], + id="rs_456", status=None), + SimpleNamespace(type="message", status="completed", + content=[SimpleNamespace(type="output_text", text="done")], + phase="final_answer"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert msg.codex_reasoning_items is not None + assert len(msg.codex_reasoning_items) == 1 + assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123" + assert msg.codex_reasoning_items[0]["id"] == "rs_456" + + def test_no_encrypted_content_when_missing(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="message", status="completed", + content=[SimpleNamespace(type="output_text", text="no reasoning")], + phase="final_answer"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert msg.codex_reasoning_items is None + + def test_tool_calls_extracted(self, monkeypatch): + agent = self._make_codex_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace(type="function_call", status="completed", + call_id="call_xyz", name="web_search", + arguments='{"query":"test"}', id="fc_xyz"), + ], + status="completed", + ) + msg, reason = agent._normalize_codex_response(response) + assert reason == "tool_calls" + assert len(msg.tool_calls) == 1 + assert msg.tool_calls[0].function.name == "web_search" + + +# ── Chat completions response handling (OpenRouter/Nous) ───────────────────── + +class TestBuildAssistantMessage: + """Verify _build_assistant_message works for all provider response formats.""" + + def test_openrouter_reasoning_fields(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + msg = SimpleNamespace( + content="answer", + tool_calls=None, + reasoning="I thought about it", + reasoning_content=None, + reasoning_details=None, + ) + result = agent._build_assistant_message(msg, "stop") + assert result["content"] == "answer" + assert result["reasoning"] == "I thought about it" + assert "codex_reasoning_items" not in result + + def test_openrouter_reasoning_details_preserved_unmodified(self, monkeypatch): + """reasoning_details must be passed back exactly as received for + multi-turn continuity (OpenRouter, Anthropic, OpenAI all need this).""" + agent = _make_agent(monkeypatch, "openrouter") + original_detail = { + "type": "thinking", + "thinking": "deep thoughts here", + "signature": "sig123_opaque_blob", + "encrypted_content": "some_provider_blob", + "extra_field": "should_not_be_dropped", + } + msg = SimpleNamespace( + content="answer", + tool_calls=None, + reasoning=None, + reasoning_content=None, + reasoning_details=[original_detail], + ) + result = agent._build_assistant_message(msg, "stop") + stored = result["reasoning_details"][0] + # ALL fields must survive, not just type/text/signature + assert stored["signature"] == "sig123_opaque_blob" + assert stored["encrypted_content"] == "some_provider_blob" + assert stored["extra_field"] == "should_not_be_dropped" + assert stored["thinking"] == "deep thoughts here" + + def test_codex_preserves_encrypted_reasoning(self, monkeypatch): + agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses", + base_url="https://chatgpt.com/backend-api/codex") + msg = SimpleNamespace( + content="result", + tool_calls=None, + reasoning="summary text", + reasoning_content=None, + reasoning_details=None, + codex_reasoning_items=[ + {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"}, + ], + ) + result = agent._build_assistant_message(msg, "stop") + assert result["codex_reasoning_items"] == [ + {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"}, + ] + + def test_plain_message_no_codex_items(self, monkeypatch): + agent = _make_agent(monkeypatch, "openrouter") + msg = SimpleNamespace( + content="simple", + tool_calls=None, + reasoning=None, + reasoning_content=None, + reasoning_details=None, + ) + result = agent._build_assistant_message(msg, "stop") + assert "codex_reasoning_items" not in result + + +# ── Auxiliary client provider resolution ───────────────────────────────────── + +class TestAuxiliaryClientProviderPriority: + """Verify auxiliary client resolution doesn't break for any provider.""" + + def test_openrouter_always_wins(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + from agent.auxiliary_client import get_text_auxiliary_client + with patch("agent.auxiliary_client.OpenAI") as mock: + client, model = get_text_auxiliary_client() + assert model == "google/gemini-3-flash-preview" + assert "openrouter" in str(mock.call_args.kwargs["base_url"]).lower() + + def test_nous_when_no_openrouter(self, monkeypatch): + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + from agent.auxiliary_client import get_text_auxiliary_client + with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \ + patch("agent.auxiliary_client.OpenAI") as mock: + client, model = get_text_auxiliary_client() + assert model == "gemini-3-flash" + + def test_custom_endpoint_when_no_nous(self, monkeypatch): + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + from agent.auxiliary_client import get_text_auxiliary_client + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI") as mock: + client, model = get_text_auxiliary_client() + assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1" + + def test_codex_fallback_last_resort(self, monkeypatch): + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \ + patch("agent.auxiliary_client.OpenAI"): + client, model = get_text_auxiliary_client() + assert model == "gpt-5.3-codex" + assert isinstance(client, CodexAuxiliaryClient) diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py index b3d3f552f..a1e5e817e 100644 --- a/tests/test_run_agent_codex_responses.py +++ b/tests/test_run_agent_codex_responses.py @@ -530,12 +530,27 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch): agent = _build_agent(monkeypatch) kwargs = _codex_request_kwargs() - kwargs["temperature"] = 0 + kwargs["some_unknown_field"] = "value" with pytest.raises(ValueError, match="unsupported field"): agent._preflight_codex_api_kwargs(kwargs) +def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch): + agent = _build_agent(monkeypatch) + kwargs = _codex_request_kwargs() + kwargs["reasoning"] = {"effort": "high", "summary": "auto"} + kwargs["include"] = ["reasoning.encrypted_content"] + kwargs["temperature"] = 0.7 + kwargs["max_output_tokens"] = 4096 + + result = agent._preflight_codex_api_kwargs(kwargs) + assert result["reasoning"] == {"effort": "high", "summary": "auto"} + assert result["include"] == ["reasoning.encrypted_content"] + assert result["temperature"] == 0.7 + assert result["max_output_tokens"] == 4096 + + def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch): agent = _build_agent(monkeypatch) responses = [_codex_tool_call_response(), _codex_message_response("done")] diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index bbba7b385..b11b79fda 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -24,26 +24,13 @@ from typing import Dict, Any, List, Optional from openai import AsyncOpenAI, OpenAI -from agent.auxiliary_client import get_text_auxiliary_client +from agent.auxiliary_client import get_async_text_auxiliary_client -# Resolve the auxiliary client at import time so we have the model slug. -# We build an AsyncOpenAI from the same credentials for async summarization. -_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client() -_async_aux_client: AsyncOpenAI | None = None -if _aux_client is not None: - _async_kwargs = { - "api_key": _aux_client.api_key, - "base_url": str(_aux_client.base_url), - } - if "openrouter" in str(_aux_client.base_url).lower(): - _async_kwargs["default_headers"] = { - "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", - "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "productivity,cli-agent", - } - _async_aux_client = AsyncOpenAI(**_async_kwargs) +# Resolve the async auxiliary client at import time so we have the model slug. +# Handles Codex Responses API adapter transparently. +_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client() MAX_SESSION_CHARS = 100_000 -MAX_SUMMARY_TOKENS = 2000 +MAX_SUMMARY_TOKENS = 10000 def _format_timestamp(ts) -> str: diff --git a/tools/web_tools.py b/tools/web_tools.py index 7ec08fc02..541404e6d 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -48,7 +48,7 @@ import asyncio from typing import List, Dict, Any, Optional from firecrawl import Firecrawl from openai import AsyncOpenAI -from agent.auxiliary_client import get_text_auxiliary_client +from agent.auxiliary_client import get_async_text_auxiliary_client from tools.debug_helpers import DebugSession logger = logging.getLogger(__name__) @@ -67,21 +67,9 @@ def _get_firecrawl_client(): DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000 -# Resolve auxiliary text client at module level; build an async wrapper. -_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client() -_aux_async_client: AsyncOpenAI | None = None -if _aux_sync_client is not None: - _async_kwargs = { - "api_key": _aux_sync_client.api_key, - "base_url": str(_aux_sync_client.base_url), - } - if "openrouter" in str(_aux_sync_client.base_url).lower(): - _async_kwargs["default_headers"] = { - "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", - "X-OpenRouter-Title": "Hermes Agent", - "X-OpenRouter-Categories": "productivity,cli-agent", - } - _aux_async_client = AsyncOpenAI(**_async_kwargs) +# Resolve async auxiliary client at module level. +# Handles Codex Responses API adapter transparently. +_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client() _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG") @@ -174,7 +162,7 @@ async def _call_summarizer_llm( content: str, context_str: str, model: str, - max_tokens: int = 4000, + max_tokens: int = 20000, is_chunk: bool = False, chunk_info: str = "" ) -> Optional[str]: @@ -306,7 +294,7 @@ async def _process_large_content_chunked( chunk_content, context_str, model, - max_tokens=2000, + max_tokens=10000, is_chunk=True, chunk_info=chunk_info ) @@ -374,7 +362,7 @@ Create a single, unified markdown summary.""" {"role": "user", "content": synthesis_prompt} ], temperature=0.1, - **auxiliary_max_tokens_param(4000), + **auxiliary_max_tokens_param(20000), **({} if not _extra else {"extra_body": _extra}), ) final_summary = response.choices[0].message.content.strip()