From 5a1e2a307ae429f075c2565b3cdc267691769c0c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Fri, 27 Mar 2026 07:49:44 -0700 Subject: [PATCH] perf(ttft): salvage easy-win startup optimizations from #3346 (#3395) * perf(ttft): dedupe shared tool availability checks * perf(ttft): short-circuit vision auto-resolution * perf(ttft): make Claude Code version detection lazy * perf(ttft): reuse loaded toolsets for skills prompt --------- Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> --- agent/anthropic_adapter.py | 14 +++++++++--- agent/auxiliary_client.py | 8 ++++++- run_agent.py | 15 +++++++++++-- tests/agent/test_auxiliary_client.py | 25 ++++++++++++++++++++++ tests/test_run_agent.py | 32 ++++++++++++++++++++++++++++ tests/tools/test_registry.py | 27 +++++++++++++++++++++++ tools/registry.py | 15 +++++++------ 7 files changed, 124 insertions(+), 12 deletions(-) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 695f4ac97..fb5044137 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -59,6 +59,7 @@ _OAUTH_ONLY_BETAS = [ # The version must stay reasonably current — Anthropic rejects OAuth requests # when the spoofed user-agent version is too far behind the actual release. _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74" +_claude_code_version_cache: Optional[str] = None def _detect_claude_code_version() -> str: @@ -86,11 +87,18 @@ def _detect_claude_code_version() -> str: return _CLAUDE_CODE_VERSION_FALLBACK -_CLAUDE_CODE_VERSION = _detect_claude_code_version() _CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude." _MCP_TOOL_PREFIX = "mcp_" +def _get_claude_code_version() -> str: + """Lazily detect the installed Claude Code version when OAuth headers need it.""" + global _claude_code_version_cache + if _claude_code_version_cache is None: + _claude_code_version_cache = _detect_claude_code_version() + return _claude_code_version_cache + + def _is_oauth_token(key: str) -> bool: """Check if the key is an OAuth/setup token (not a regular Console API key). @@ -132,7 +140,7 @@ def build_anthropic_client(api_key: str, base_url: str = None): kwargs["auth_token"] = api_key kwargs["default_headers"] = { "anthropic-beta": ",".join(all_betas), - "user-agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)", + "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)", "x-app": "cli", } else: @@ -241,7 +249,7 @@ def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]: headers = { "Content-Type": "application/json", - "User-Agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)", + "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)", } for endpoint in token_endpoints: diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 7c9763fc0..cbdb6f48b 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1137,7 +1137,13 @@ def resolve_vision_provider_client( return "custom", client, final_model if requested == "auto": - for candidate in get_available_vision_backends(): + ordered = list(_VISION_AUTO_PROVIDER_ORDER) + preferred = _preferred_main_vision_provider() + if preferred in ordered: + ordered.remove(preferred) + ordered.insert(0, preferred) + + for candidate in ordered: sync_client, default_model = _resolve_strict_vision_backend(candidate) if sync_client is not None: return _finalize(candidate, sync_client, default_model) diff --git a/run_agent.py b/run_agent.py index 669b60813..e220fa992 100644 --- a/run_agent.py +++ b/run_agent.py @@ -62,7 +62,12 @@ else: # Import our tool system -from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements +from model_tools import ( + get_tool_definitions, + get_toolset_for_tool, + handle_function_call, + check_toolset_requirements, +) from tools.terminal_tool import cleanup_vm from tools.interrupt import set_interrupt as _set_interrupt from tools.browser_tool import cleanup_browser @@ -2520,7 +2525,13 @@ class AIAgent: has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view', 'skill_manage']) if has_skills_tools: - avail_toolsets = {ts for ts, avail in check_toolset_requirements().items() if avail} + avail_toolsets = { + toolset + for toolset in ( + get_toolset_for_tool(tool_name) for tool_name in self.valid_tool_names + ) + if toolset + } skills_prompt = build_skills_system_prompt( available_tools=self.valid_tool_names, available_toolsets=avail_toolsets, diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 10e14518a..08ed9bc8d 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -11,6 +11,7 @@ from agent.auxiliary_client import ( get_text_auxiliary_client, get_vision_auxiliary_client, get_available_vision_backends, + resolve_vision_provider_client, resolve_provider_client, auxiliary_max_tokens_param, _read_codex_access_token, @@ -638,6 +639,30 @@ class TestVisionClientFallback: assert client.__class__.__name__ == "AnthropicAuxiliaryClient" assert model == "claude-haiku-4-5-20251001" + def test_selected_codex_provider_short_circuits_vision_auto(self, monkeypatch): + def fake_load_config(): + return {"model": {"provider": "openai-codex", "default": "gpt-5.2-codex"}} + + codex_client = MagicMock() + with ( + patch("hermes_cli.config.load_config", fake_load_config), + patch("agent.auxiliary_client._try_codex", return_value=(codex_client, "gpt-5.2-codex")) as mock_codex, + patch("agent.auxiliary_client._try_openrouter") as mock_openrouter, + patch("agent.auxiliary_client._try_nous") as mock_nous, + patch("agent.auxiliary_client._try_anthropic") as mock_anthropic, + patch("agent.auxiliary_client._try_custom_endpoint") as mock_custom, + ): + provider, client, model = resolve_vision_provider_client() + + assert provider == "openai-codex" + assert client is codex_client + assert model == "gpt-5.2-codex" + mock_codex.assert_called_once() + mock_openrouter.assert_not_called() + mock_nous.assert_not_called() + mock_anthropic.assert_not_called() + mock_custom.assert_not_called() + def test_vision_auto_includes_codex(self, codex_auth_dir): """Codex supports vision (gpt-5.3-codex), so auto mode should use it.""" with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index d961244f3..1d60d6db4 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -584,6 +584,38 @@ class TestBuildSystemPrompt: # Should contain current date info like "Conversation started:" assert "Conversation started:" in prompt + def test_skills_prompt_derives_available_toolsets_from_loaded_tools(self): + tools = _make_tool_defs("web_search", "skills_list", "skill_view", "skill_manage") + toolset_map = { + "web_search": "web", + "skills_list": "skills", + "skill_view": "skills", + "skill_manage": "skills", + } + + with ( + patch("run_agent.get_tool_definitions", return_value=tools), + patch( + "run_agent.check_toolset_requirements", + side_effect=AssertionError("should not re-check toolset requirements"), + ), + patch("run_agent.get_toolset_for_tool", create=True, side_effect=toolset_map.get), + patch("run_agent.build_skills_system_prompt", return_value="SKILLS_PROMPT") as mock_skills, + patch("run_agent.OpenAI"), + ): + agent = AIAgent( + api_key="test-k...7890", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + + prompt = agent._build_system_prompt() + + assert "SKILLS_PROMPT" in prompt + assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map) + assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"} + class TestInvalidateSystemPrompt: def test_clears_cache(self, agent): diff --git a/tests/tools/test_registry.py b/tests/tools/test_registry.py index eac4ab04f..455e9f48a 100644 --- a/tests/tools/test_registry.py +++ b/tests/tools/test_registry.py @@ -81,6 +81,33 @@ class TestGetDefinitions: assert len(defs) == 1 assert defs[0]["function"]["name"] == "available" + def test_reuses_shared_check_fn_once_per_call(self): + reg = ToolRegistry() + calls = {"count": 0} + + def shared_check(): + calls["count"] += 1 + return True + + reg.register( + name="first", + toolset="shared", + schema=_make_schema("first"), + handler=_dummy_handler, + check_fn=shared_check, + ) + reg.register( + name="second", + toolset="shared", + schema=_make_schema("second"), + handler=_dummy_handler, + check_fn=shared_check, + ) + + defs = reg.get_definitions({"first", "second"}) + assert len(defs) == 2 + assert calls["count"] == 1 + class TestUnknownToolDispatch: def test_returns_error_json(self): diff --git a/tools/registry.py b/tools/registry.py index c13d98502..fa1afa03e 100644 --- a/tools/registry.py +++ b/tools/registry.py @@ -98,19 +98,22 @@ class ToolRegistry: are included. """ result = [] + check_results: Dict[Callable, bool] = {} for name in sorted(tool_names): entry = self._tools.get(name) if not entry: continue if entry.check_fn: - try: - if not entry.check_fn(): + if entry.check_fn not in check_results: + try: + check_results[entry.check_fn] = bool(entry.check_fn()) + except Exception: + check_results[entry.check_fn] = False if not quiet: - logger.debug("Tool %s unavailable (check failed)", name) - continue - except Exception: + logger.debug("Tool %s check raised; skipping", name) + if not check_results[entry.check_fn]: if not quiet: - logger.debug("Tool %s check raised; skipping", name) + logger.debug("Tool %s unavailable (check failed)", name) continue result.append({"type": "function", "function": entry.schema}) return result