- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults. - Updated the context compressor's summary target tokens to 2500 for improved performance. - Added external credential detection for Codex CLI to streamline authentication. - Refactored various components to ensure consistent handling of authentication and model selection across the application.
226 lines
9.0 KiB
Python
226 lines
9.0 KiB
Python
"""Tests for flush_memories() working correctly across all provider modes.
|
|
|
|
Catches the bug where Codex mode called chat.completions.create on a
|
|
Responses-only client, which would fail silently or with a 404.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import types
|
|
from types import SimpleNamespace
|
|
from unittest.mock import patch, MagicMock, call
|
|
|
|
import pytest
|
|
|
|
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
|
|
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
|
|
sys.modules.setdefault("fal_client", types.SimpleNamespace())
|
|
|
|
import run_agent
|
|
|
|
|
|
class _FakeOpenAI:
|
|
def __init__(self, **kwargs):
|
|
self.kwargs = kwargs
|
|
self.api_key = kwargs.get("api_key", "test")
|
|
self.base_url = kwargs.get("base_url", "http://test")
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
|
|
def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
|
|
"""Build an AIAgent with mocked internals, ready for flush_memories testing."""
|
|
monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
|
|
{
|
|
"type": "function",
|
|
"function": {
|
|
"name": "memory",
|
|
"description": "Manage memories.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"action": {"type": "string"},
|
|
"target": {"type": "string"},
|
|
"content": {"type": "string"},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
])
|
|
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
|
|
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
|
|
|
|
agent = run_agent.AIAgent(
|
|
api_key="test-key",
|
|
base_url="https://test.example.com/v1",
|
|
provider=provider,
|
|
api_mode=api_mode,
|
|
max_iterations=4,
|
|
quiet_mode=True,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
)
|
|
# Give it a valid memory store
|
|
agent._memory_store = MagicMock()
|
|
agent._memory_flush_min_turns = 1
|
|
agent._user_turn_count = 5
|
|
return agent
|
|
|
|
|
|
def _chat_response_with_memory_call():
|
|
"""Simulated chat completions response with a memory tool call."""
|
|
return SimpleNamespace(
|
|
choices=[SimpleNamespace(
|
|
message=SimpleNamespace(
|
|
content=None,
|
|
tool_calls=[SimpleNamespace(
|
|
function=SimpleNamespace(
|
|
name="memory",
|
|
arguments=json.dumps({
|
|
"action": "add",
|
|
"target": "notes",
|
|
"content": "User prefers dark mode.",
|
|
}),
|
|
),
|
|
)],
|
|
),
|
|
)],
|
|
usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
|
|
)
|
|
|
|
|
|
class TestFlushMemoriesUsesAuxiliaryClient:
|
|
"""When an auxiliary client is available, flush_memories should use it
|
|
instead of self.client -- especially critical in Codex mode."""
|
|
|
|
def test_flush_uses_auxiliary_when_available(self, monkeypatch):
|
|
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
|
|
|
|
mock_aux_client = MagicMock()
|
|
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
|
|
|
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
|
|
messages = [
|
|
{"role": "user", "content": "Hello"},
|
|
{"role": "assistant", "content": "Hi there"},
|
|
{"role": "user", "content": "Remember this"},
|
|
]
|
|
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
|
agent.flush_memories(messages)
|
|
|
|
mock_aux_client.chat.completions.create.assert_called_once()
|
|
call_kwargs = mock_aux_client.chat.completions.create.call_args
|
|
assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
|
|
|
|
def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
|
|
"""Non-Codex mode with no auxiliary falls back to self.client."""
|
|
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
|
agent.client = MagicMock()
|
|
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
|
|
|
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
|
|
messages = [
|
|
{"role": "user", "content": "Hello"},
|
|
{"role": "assistant", "content": "Hi there"},
|
|
{"role": "user", "content": "Save this"},
|
|
]
|
|
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
|
agent.flush_memories(messages)
|
|
|
|
agent.client.chat.completions.create.assert_called_once()
|
|
|
|
def test_flush_executes_memory_tool_calls(self, monkeypatch):
|
|
"""Verify that memory tool calls from the flush response actually get executed."""
|
|
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
|
|
|
mock_aux_client = MagicMock()
|
|
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
|
|
|
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
|
|
messages = [
|
|
{"role": "user", "content": "Hello"},
|
|
{"role": "assistant", "content": "Hi"},
|
|
{"role": "user", "content": "Note this"},
|
|
]
|
|
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
|
agent.flush_memories(messages)
|
|
|
|
mock_memory.assert_called_once()
|
|
call_kwargs = mock_memory.call_args
|
|
assert call_kwargs.kwargs["action"] == "add"
|
|
assert call_kwargs.kwargs["target"] == "notes"
|
|
assert "dark mode" in call_kwargs.kwargs["content"]
|
|
|
|
def test_flush_strips_artifacts_from_messages(self, monkeypatch):
|
|
"""After flush, the flush prompt and any response should be removed from messages."""
|
|
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
|
|
|
mock_aux_client = MagicMock()
|
|
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
|
|
|
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
|
|
messages = [
|
|
{"role": "user", "content": "Hello"},
|
|
{"role": "assistant", "content": "Hi"},
|
|
{"role": "user", "content": "Remember X"},
|
|
]
|
|
original_len = len(messages)
|
|
with patch("tools.memory_tool.memory_tool", return_value="Saved."):
|
|
agent.flush_memories(messages)
|
|
|
|
# Messages should not grow from the flush
|
|
assert len(messages) <= original_len
|
|
# No flush sentinel should remain
|
|
for msg in messages:
|
|
assert "_flush_sentinel" not in msg
|
|
|
|
|
|
class TestFlushMemoriesCodexFallback:
|
|
"""When no auxiliary client exists and we're in Codex mode, flush should
|
|
use the Codex Responses API path instead of chat.completions."""
|
|
|
|
def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
|
|
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
|
|
|
|
codex_response = SimpleNamespace(
|
|
output=[
|
|
SimpleNamespace(
|
|
type="function_call",
|
|
call_id="call_1",
|
|
name="memory",
|
|
arguments=json.dumps({
|
|
"action": "add",
|
|
"target": "notes",
|
|
"content": "Codex flush test",
|
|
}),
|
|
),
|
|
],
|
|
usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
|
|
status="completed",
|
|
model="gpt-5-codex",
|
|
)
|
|
|
|
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
|
|
patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
|
|
patch.object(agent, "_build_api_kwargs") as mock_build, \
|
|
patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
|
mock_build.return_value = {
|
|
"model": "gpt-5-codex",
|
|
"instructions": "test",
|
|
"input": [],
|
|
"tools": [],
|
|
"max_output_tokens": 4096,
|
|
}
|
|
messages = [
|
|
{"role": "user", "content": "Hello"},
|
|
{"role": "assistant", "content": "Hi"},
|
|
{"role": "user", "content": "Save this"},
|
|
]
|
|
agent.flush_memories(messages)
|
|
|
|
mock_stream.assert_called_once()
|
|
mock_memory.assert_called_once()
|
|
assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
|