tests/test_openai_client_lifecycle.py

import sys
import threading
import types
from types import SimpleNamespace

import httpx
import pytest
from openai import APIConnectionError

sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())

import run_agent


class FakeRequestClient:
    def __init__(self, responder):
        self._responder = responder
        self._client = SimpleNamespace(is_closed=False)
        self.chat = SimpleNamespace(
            completions=SimpleNamespace(create=self._create)
        )
        self.responses = SimpleNamespace()
        self.close_calls = 0

    def _create(self, **kwargs):
        return self._responder(**kwargs)

    def close(self):
        self.close_calls += 1
        self._client.is_closed = True


class FakeSharedClient(FakeRequestClient):
    pass


class OpenAIFactory:
    def __init__(self, clients):
        self._clients = list(clients)
        self.calls = []

    def __call__(self, **kwargs):
        self.calls.append(dict(kwargs))
        if not self._clients:
            raise AssertionError("OpenAI factory exhausted")
        return self._clients.pop(0)


def _build_agent(shared_client=None):
    agent = run_agent.AIAgent.__new__(run_agent.AIAgent)
    agent.api_mode = "chat_completions"
    agent.provider = "openai-codex"
    agent.base_url = "https://chatgpt.com/backend-api/codex"
    agent.model = "gpt-5-codex"
    agent.log_prefix = ""
    agent.quiet_mode = True
    agent._interrupt_requested = False
    agent._interrupt_message = None
    agent._client_lock = threading.RLock()
    agent._client_kwargs = {"api_key": "***", "base_url": agent.base_url}
    agent.client = shared_client or FakeSharedClient(lambda **kwargs: {"shared": True})
    agent.stream_delta_callback = None
    agent._stream_callback = None
    agent.reasoning_callback = None
    return agent


def _connection_error():
    return APIConnectionError(
        message="Connection error.",
        request=httpx.Request("POST", "https://example.com/v1/chat/completions"),
    )


def test_retry_after_api_connection_error_recreates_request_client(monkeypatch):
    first_request = FakeRequestClient(lambda **kwargs: (_ for _ in ()).throw(_connection_error()))
    second_request = FakeRequestClient(lambda **kwargs: {"ok": True})
    factory = OpenAIFactory([first_request, second_request])
    monkeypatch.setattr(run_agent, "OpenAI", factory)

    agent = _build_agent()

    with pytest.raises(APIConnectionError):
        agent._interruptible_api_call({"model": agent.model, "messages": []})

    result = agent._interruptible_api_call({"model": agent.model, "messages": []})

    assert result == {"ok": True}
    assert len(factory.calls) == 2
    assert first_request.close_calls >= 1
    assert second_request.close_calls >= 1


def test_closed_shared_client_is_recreated_before_request(monkeypatch):
    stale_shared = FakeSharedClient(lambda **kwargs: (_ for _ in ()).throw(AssertionError("stale shared client used")))
    stale_shared._client.is_closed = True

    replacement_shared = FakeSharedClient(lambda **kwargs: {"replacement": True})
    request_client = FakeRequestClient(lambda **kwargs: {"ok": "fresh-request-client"})
    factory = OpenAIFactory([replacement_shared, request_client])
    monkeypatch.setattr(run_agent, "OpenAI", factory)

    agent = _build_agent(shared_client=stale_shared)
    result = agent._interruptible_api_call({"model": agent.model, "messages": []})

    assert result == {"ok": "fresh-request-client"}
    assert agent.client is replacement_shared
    assert stale_shared.close_calls >= 1
    assert replacement_shared.close_calls == 0
    assert len(factory.calls) == 2


def test_concurrent_requests_do_not_break_each_other_when_one_client_closes(monkeypatch):
    first_started = threading.Event()
    first_closed = threading.Event()

    def first_responder(**kwargs):
        first_started.set()
        first_client.close()
        first_closed.set()
        raise _connection_error()

    def second_responder(**kwargs):
        assert first_started.wait(timeout=2)
        assert first_closed.wait(timeout=2)
        return {"ok": "second"}

    first_client = FakeRequestClient(first_responder)
    second_client = FakeRequestClient(second_responder)
    factory = OpenAIFactory([first_client, second_client])
    monkeypatch.setattr(run_agent, "OpenAI", factory)

    agent = _build_agent()
    results = {}

    def run_call(name):
        try:
            results[name] = agent._interruptible_api_call({"model": agent.model, "messages": []})
        except Exception as exc:  # noqa: BLE001 - asserting exact type below
            results[name] = exc

    thread_one = threading.Thread(target=run_call, args=("first",), daemon=True)
    thread_two = threading.Thread(target=run_call, args=("second",), daemon=True)
    thread_one.start()
    thread_two.start()
    thread_one.join(timeout=5)
    thread_two.join(timeout=5)

    values = list(results.values())
    assert sum(isinstance(value, APIConnectionError) for value in values) == 1
    assert values.count({"ok": "second"}) == 1
    assert len(factory.calls) == 2


def test_streaming_call_recreates_closed_shared_client_before_request(monkeypatch):
    chunks = iter([
        SimpleNamespace(
            model="gpt-5-codex",
            choices=[SimpleNamespace(delta=SimpleNamespace(content="Hello", tool_calls=None), finish_reason=None)],
        ),
        SimpleNamespace(
            model="gpt-5-codex",
            choices=[SimpleNamespace(delta=SimpleNamespace(content=" world", tool_calls=None), finish_reason="stop")],
        ),
    ])

    stale_shared = FakeSharedClient(lambda **kwargs: (_ for _ in ()).throw(AssertionError("stale shared client used")))
    stale_shared._client.is_closed = True

    replacement_shared = FakeSharedClient(lambda **kwargs: {"replacement": True})
    request_client = FakeRequestClient(lambda **kwargs: chunks)
    factory = OpenAIFactory([replacement_shared, request_client])
    monkeypatch.setattr(run_agent, "OpenAI", factory)

    agent = _build_agent(shared_client=stale_shared)
    agent.stream_delta_callback = lambda _delta: None
    # Force chat_completions mode so the streaming path uses
    # chat.completions.create(stream=True) instead of Codex responses.stream()
    agent.api_mode = "chat_completions"
    response = agent._interruptible_streaming_api_call({"model": agent.model, "messages": []})

    assert response.choices[0].message.content == "Hello world"
    assert agent.client is replacement_shared
    assert stale_shared.close_calls >= 1
    assert request_client.close_calls >= 1
    assert len(factory.calls) == 2
fix: prevent closed OpenAI client reuse across retries Use per-request OpenAI clients inside _interruptible_api_call so interrupts and transport failures do not poison later retries. Also add closed-client detection/recreation for the shared client and regression tests covering retry and concurrency behavior. 2026-03-14 04:13:05 +00:00			`import sys`
			`import threading`
			`import types`
			`from types import SimpleNamespace`

			`import httpx`
			`import pytest`
			`from openai import APIConnectionError`

			`sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda a, *k: None))`
			`sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))`
			`sys.modules.setdefault("fal_client", types.SimpleNamespace())`

			`import run_agent`


			`class FakeRequestClient:`
			`def __init__(self, responder):`
			`self._responder = responder`
			`self._client = SimpleNamespace(is_closed=False)`
			`self.chat = SimpleNamespace(`
			`completions=SimpleNamespace(create=self._create)`
			`)`
			`self.responses = SimpleNamespace()`
			`self.close_calls = 0`

			`def _create(self, **kwargs):`
			`return self._responder(**kwargs)`

			`def close(self):`
			`self.close_calls += 1`
			`self._client.is_closed = True`


			`class FakeSharedClient(FakeRequestClient):`
			`pass`


			`class OpenAIFactory:`
			`def __init__(self, clients):`
			`self._clients = list(clients)`
			`self.calls = []`

			`def __call__(self, **kwargs):`
			`self.calls.append(dict(kwargs))`
			`if not self._clients:`
			`raise AssertionError("OpenAI factory exhausted")`
			`return self._clients.pop(0)`


			`def _build_agent(shared_client=None):`
			`agent = run_agent.AIAgent.__new__(run_agent.AIAgent)`
			`agent.api_mode = "chat_completions"`
			`agent.provider = "openai-codex"`
			`agent.base_url = "https://chatgpt.com/backend-api/codex"`
			`agent.model = "gpt-5-codex"`
			`agent.log_prefix = ""`
			`agent.quiet_mode = True`
			`agent._interrupt_requested = False`
			`agent._interrupt_message = None`
			`agent._client_lock = threading.RLock()`
fix: audit fixes — 5 bugs found and resolved Thorough code review found 5 issues across run_agent.py, cli.py, and gateway/: 1. CRITICAL — Gateway stream consumer task never started: stream_consumer_holder was checked BEFORE run_sync populated it. Fixed with async polling pattern (same as track_agent). 2. MEDIUM-HIGH — Streaming fallback after partial delivery caused double-response: if streaming failed after some tokens were delivered, the fallback would re-deliver the full response. Now tracks deltas_were_sent and only falls back when no tokens reached consumers yet. 3. MEDIUM — Codex mode lost on_first_delta spinner callback: _run_codex_stream now accepts on_first_delta parameter, fires it on first text delta. Passed through from _interruptible_streaming_api_call via _codex_on_first_delta instance attribute. 4. MEDIUM — CLI close-tag after-text bypassed tag filtering: text after a reasoning close tag was sent directly to _emit_stream_text, skipping open-tag detection. Now routes through _stream_delta for full filtering. 5. LOW — Removed 140 lines of dead code: old _streaming_api_call method (superseded by _interruptible_streaming_api_call). Updated 13 tests in test_run_agent.py and test_openai_client_lifecycle.py to use the new method name and signature. 4573 tests passing. 2026-03-16 06:35:46 -07:00			`agent._client_kwargs = {"api_key": "***", "base_url": agent.base_url}`
fix: prevent closed OpenAI client reuse across retries Use per-request OpenAI clients inside _interruptible_api_call so interrupts and transport failures do not poison later retries. Also add closed-client detection/recreation for the shared client and regression tests covering retry and concurrency behavior. 2026-03-14 04:13:05 +00:00			`agent.client = shared_client or FakeSharedClient(lambda **kwargs: {"shared": True})`
fix: audit fixes — 5 bugs found and resolved Thorough code review found 5 issues across run_agent.py, cli.py, and gateway/: 1. CRITICAL — Gateway stream consumer task never started: stream_consumer_holder was checked BEFORE run_sync populated it. Fixed with async polling pattern (same as track_agent). 2. MEDIUM-HIGH — Streaming fallback after partial delivery caused double-response: if streaming failed after some tokens were delivered, the fallback would re-deliver the full response. Now tracks deltas_were_sent and only falls back when no tokens reached consumers yet. 3. MEDIUM — Codex mode lost on_first_delta spinner callback: _run_codex_stream now accepts on_first_delta parameter, fires it on first text delta. Passed through from _interruptible_streaming_api_call via _codex_on_first_delta instance attribute. 4. MEDIUM — CLI close-tag after-text bypassed tag filtering: text after a reasoning close tag was sent directly to _emit_stream_text, skipping open-tag detection. Now routes through _stream_delta for full filtering. 5. LOW — Removed 140 lines of dead code: old _streaming_api_call method (superseded by _interruptible_streaming_api_call). Updated 13 tests in test_run_agent.py and test_openai_client_lifecycle.py to use the new method name and signature. 4573 tests passing. 2026-03-16 06:35:46 -07:00			`agent.stream_delta_callback = None`
			`agent._stream_callback = None`
			`agent.reasoning_callback = None`
fix: prevent closed OpenAI client reuse across retries Use per-request OpenAI clients inside _interruptible_api_call so interrupts and transport failures do not poison later retries. Also add closed-client detection/recreation for the shared client and regression tests covering retry and concurrency behavior. 2026-03-14 04:13:05 +00:00			`return agent`


			`def _connection_error():`
			`return APIConnectionError(`
			`message="Connection error.",`
			`request=httpx.Request("POST", "https://example.com/v1/chat/completions"),`
			`)`


			`def test_retry_after_api_connection_error_recreates_request_client(monkeypatch):`
			`first_request = FakeRequestClient(lambda **kwargs: (_ for _ in ()).throw(_connection_error()))`
			`second_request = FakeRequestClient(lambda **kwargs: {"ok": True})`
			`factory = OpenAIFactory([first_request, second_request])`
			`monkeypatch.setattr(run_agent, "OpenAI", factory)`

			`agent = _build_agent()`

			`with pytest.raises(APIConnectionError):`
			`agent._interruptible_api_call({"model": agent.model, "messages": []})`

			`result = agent._interruptible_api_call({"model": agent.model, "messages": []})`

			`assert result == {"ok": True}`
			`assert len(factory.calls) == 2`
			`assert first_request.close_calls >= 1`
			`assert second_request.close_calls >= 1`


			`def test_closed_shared_client_is_recreated_before_request(monkeypatch):`
			`stale_shared = FakeSharedClient(lambda **kwargs: (_ for _ in ()).throw(AssertionError("stale shared client used")))`
			`stale_shared._client.is_closed = True`

			`replacement_shared = FakeSharedClient(lambda **kwargs: {"replacement": True})`
			`request_client = FakeRequestClient(lambda **kwargs: {"ok": "fresh-request-client"})`
			`factory = OpenAIFactory([replacement_shared, request_client])`
			`monkeypatch.setattr(run_agent, "OpenAI", factory)`

			`agent = _build_agent(shared_client=stale_shared)`
			`result = agent._interruptible_api_call({"model": agent.model, "messages": []})`

			`assert result == {"ok": "fresh-request-client"}`
			`assert agent.client is replacement_shared`
			`assert stale_shared.close_calls >= 1`
			`assert replacement_shared.close_calls == 0`
			`assert len(factory.calls) == 2`


			`def test_concurrent_requests_do_not_break_each_other_when_one_client_closes(monkeypatch):`
			`first_started = threading.Event()`
			`first_closed = threading.Event()`

			`def first_responder(**kwargs):`
			`first_started.set()`
			`first_client.close()`
			`first_closed.set()`
			`raise _connection_error()`

			`def second_responder(**kwargs):`
			`assert first_started.wait(timeout=2)`
			`assert first_closed.wait(timeout=2)`
			`return {"ok": "second"}`

			`first_client = FakeRequestClient(first_responder)`
			`second_client = FakeRequestClient(second_responder)`
			`factory = OpenAIFactory([first_client, second_client])`
			`monkeypatch.setattr(run_agent, "OpenAI", factory)`

			`agent = _build_agent()`
			`results = {}`

			`def run_call(name):`
			`try:`
			`results[name] = agent._interruptible_api_call({"model": agent.model, "messages": []})`
			`except Exception as exc: # noqa: BLE001 - asserting exact type below`
			`results[name] = exc`

			`thread_one = threading.Thread(target=run_call, args=("first",), daemon=True)`
			`thread_two = threading.Thread(target=run_call, args=("second",), daemon=True)`
			`thread_one.start()`
			`thread_two.start()`
			`thread_one.join(timeout=5)`
			`thread_two.join(timeout=5)`

fix(honcho): isolate session routing for multi-user gateway (#1500) Salvaged from PR #1470 by adavyas. Core fix: Honcho tool calls in a multi-session gateway could route to the wrong session because honcho_tools.py relied on process-global state. Now threads session context through the call chain: AIAgent._invoke_tool() → handle_function_call() → registry.dispatch() → handler **kw → _resolve_session_context() Changes: - Add _resolve_session_context() to prefer per-call context over globals - Plumb honcho_manager + honcho_session_key through handle_function_call - Add sync_honcho=False to run_conversation() for synthetic flush turns - Pass honcho_session_key through gateway memory flush lifecycle - Harden gateway PID detection when /proc cmdline is unreadable - Make interrupt test scripts import-safe for pytest-xdist - Wrap BibTeX examples in Jekyll raw blocks for docs build - Fix thread-order-dependent assertion in client lifecycle test - Expand Honcho docs: session isolation, lifecycle, routing internals Dropped from original PR: - Indentation change in _create_request_openai_client that would move client creation inside the lock (causes unnecessary contention) Co-authored-by: adavyas <adavyas@users.noreply.github.com> 2026-03-16 00:23:47 -07:00			`values = list(results.values())`
			`assert sum(isinstance(value, APIConnectionError) for value in values) == 1`
			`assert values.count({"ok": "second"}) == 1`
fix: prevent closed OpenAI client reuse across retries Use per-request OpenAI clients inside _interruptible_api_call so interrupts and transport failures do not poison later retries. Also add closed-client detection/recreation for the shared client and regression tests covering retry and concurrency behavior. 2026-03-14 04:13:05 +00:00			`assert len(factory.calls) == 2`



			`def test_streaming_call_recreates_closed_shared_client_before_request(monkeypatch):`
			`chunks = iter([`
			`SimpleNamespace(`
			`model="gpt-5-codex",`
			`choices=[SimpleNamespace(delta=SimpleNamespace(content="Hello", tool_calls=None), finish_reason=None)],`
			`),`
			`SimpleNamespace(`
			`model="gpt-5-codex",`
			`choices=[SimpleNamespace(delta=SimpleNamespace(content=" world", tool_calls=None), finish_reason="stop")],`
			`),`
			`])`

			`stale_shared = FakeSharedClient(lambda **kwargs: (_ for _ in ()).throw(AssertionError("stale shared client used")))`
			`stale_shared._client.is_closed = True`

			`replacement_shared = FakeSharedClient(lambda **kwargs: {"replacement": True})`
			`request_client = FakeRequestClient(lambda **kwargs: chunks)`
			`factory = OpenAIFactory([replacement_shared, request_client])`
			`monkeypatch.setattr(run_agent, "OpenAI", factory)`

			`agent = _build_agent(shared_client=stale_shared)`
fix: audit fixes — 5 bugs found and resolved Thorough code review found 5 issues across run_agent.py, cli.py, and gateway/: 1. CRITICAL — Gateway stream consumer task never started: stream_consumer_holder was checked BEFORE run_sync populated it. Fixed with async polling pattern (same as track_agent). 2. MEDIUM-HIGH — Streaming fallback after partial delivery caused double-response: if streaming failed after some tokens were delivered, the fallback would re-deliver the full response. Now tracks deltas_were_sent and only falls back when no tokens reached consumers yet. 3. MEDIUM — Codex mode lost on_first_delta spinner callback: _run_codex_stream now accepts on_first_delta parameter, fires it on first text delta. Passed through from _interruptible_streaming_api_call via _codex_on_first_delta instance attribute. 4. MEDIUM — CLI close-tag after-text bypassed tag filtering: text after a reasoning close tag was sent directly to _emit_stream_text, skipping open-tag detection. Now routes through _stream_delta for full filtering. 5. LOW — Removed 140 lines of dead code: old _streaming_api_call method (superseded by _interruptible_streaming_api_call). Updated 13 tests in test_run_agent.py and test_openai_client_lifecycle.py to use the new method name and signature. 4573 tests passing. 2026-03-16 06:35:46 -07:00			`agent.stream_delta_callback = lambda _delta: None`
			`# Force chat_completions mode so the streaming path uses`
			`# chat.completions.create(stream=True) instead of Codex responses.stream()`
			`agent.api_mode = "chat_completions"`
			`response = agent._interruptible_streaming_api_call({"model": agent.model, "messages": []})`
fix: prevent closed OpenAI client reuse across retries Use per-request OpenAI clients inside _interruptible_api_call so interrupts and transport failures do not poison later retries. Also add closed-client detection/recreation for the shared client and regression tests covering retry and concurrency behavior. 2026-03-14 04:13:05 +00:00
			`assert response.choices[0].message.content == "Hello world"`
			`assert agent.client is replacement_shared`
			`assert stale_shared.close_calls >= 1`
			`assert request_client.close_calls >= 1`
			`assert len(factory.calls) == 2`