diff --git a/run_agent.py b/run_agent.py index d1d3d27e5..7a0682385 100644 --- a/run_agent.py +++ b/run_agent.py @@ -21,6 +21,7 @@ Usage: """ import copy +import hashlib import json import logging logger = logging.getLogger(__name__) @@ -482,6 +483,54 @@ class AIAgent: if not content: return "" return re.sub(r'.*?', '', content, flags=re.DOTALL) + + def _looks_like_codex_intermediate_ack( + self, + user_message: str, + assistant_content: str, + messages: List[Dict[str, Any]], + ) -> bool: + """Detect a planning/ack message that should continue instead of ending the turn.""" + if any(isinstance(msg, dict) and msg.get("role") == "tool" for msg in messages): + return False + + assistant_text = self._strip_think_blocks(assistant_content or "").strip().lower() + if not assistant_text: + return False + if len(assistant_text) > 1200: + return False + + has_future_ack = bool( + re.search(r"\b(i['’]ll|i will|let me|i can do that|i can help with that)\b", assistant_text) + ) + if not has_future_ack: + return False + + action_markers = ( + "look into", + "inspect", + "scan", + "check", + "analyz", + "review", + "explore", + "read", + "open", + "run", + "test", + "fix", + "debug", + "search", + "find", + "walkthrough", + "report back", + "summarize", + ) + + user_text = (user_message or "").strip().lower() + user_requests_action = any(marker in user_text for marker in action_markers) or "~/" in user_text or "/" in user_text + assistant_mentions_action = any(marker in assistant_text for marker in action_markers) + return user_requests_action and assistant_mentions_action def _extract_reasoning(self, assistant_message) -> Optional[str]: @@ -1154,10 +1203,57 @@ class AIAgent: "type": "function", "name": name, "description": fn.get("description", ""), + "strict": False, "parameters": fn.get("parameters", {"type": "object", "properties": {}}), }) return converted or None + @staticmethod + def _split_responses_tool_id(raw_id: Any) -> tuple[Optional[str], Optional[str]]: + """Split a stored tool id into (call_id, response_item_id).""" + if not isinstance(raw_id, str): + return None, None + value = raw_id.strip() + if not value: + return None, None + if "|" in value: + call_id, response_item_id = value.split("|", 1) + call_id = call_id.strip() or None + response_item_id = response_item_id.strip() or None + return call_id, response_item_id + if value.startswith("fc_"): + return None, value + return value, None + + def _derive_responses_function_call_id( + self, + call_id: str, + response_item_id: Optional[str] = None, + ) -> str: + """Build a valid Responses `function_call.id` (must start with `fc_`).""" + if isinstance(response_item_id, str): + candidate = response_item_id.strip() + if candidate.startswith("fc_"): + return candidate + + source = (call_id or "").strip() + if source.startswith("fc_"): + return source + if source.startswith("call_") and len(source) > len("call_"): + return f"fc_{source[len('call_'):]}" + + sanitized = re.sub(r"[^A-Za-z0-9_-]", "", source) + if sanitized.startswith("fc_"): + return sanitized + if sanitized.startswith("call_") and len(sanitized) > len("call_"): + return f"fc_{sanitized[len('call_'):]}" + if sanitized: + return f"fc_{sanitized[:48]}" + + seed = source or str(response_item_id or "") or uuid.uuid4().hex + digest = hashlib.sha1(seed.encode("utf-8")).hexdigest()[:24] + return f"fc_{digest}" + def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Convert internal chat-style messages to Responses input items.""" items: List[Dict[str, Any]] = [] @@ -1187,9 +1283,32 @@ class AIAgent: if not isinstance(fn_name, str) or not fn_name.strip(): continue - call_id = tc.get("id") or tc.get("call_id") + embedded_call_id, embedded_response_item_id = self._split_responses_tool_id( + tc.get("id") + ) + call_id = tc.get("call_id") if not isinstance(call_id, str) or not call_id.strip(): - call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + if ( + isinstance(embedded_response_item_id, str) + and embedded_response_item_id.startswith("fc_") + and len(embedded_response_item_id) > len("fc_") + ): + call_id = f"call_{embedded_response_item_id[len('fc_'):]}" + else: + call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = call_id.strip() + + response_item_id = tc.get("response_item_id") + if not isinstance(response_item_id, str) or not response_item_id.strip(): + response_item_id = tc.get("responses_item_id") + if not isinstance(response_item_id, str) or not response_item_id.strip(): + response_item_id = embedded_response_item_id + response_item_id = self._derive_responses_function_call_id( + call_id, + response_item_id if isinstance(response_item_id, str) else None, + ) arguments = fn.get("arguments", "{}") if isinstance(arguments, dict): @@ -1200,7 +1319,7 @@ class AIAgent: items.append({ "type": "function_call", - "id": call_id, + "id": response_item_id, "call_id": call_id, "name": fn_name, "arguments": arguments, @@ -1211,7 +1330,11 @@ class AIAgent: continue if role == "tool": - call_id = msg.get("tool_call_id") + raw_tool_call_id = msg.get("tool_call_id") + call_id, _ = self._split_responses_tool_id(raw_tool_call_id) + if not isinstance(call_id, str) or not call_id.strip(): + if isinstance(raw_tool_call_id, str) and raw_tool_call_id.strip(): + call_id = raw_tool_call_id.strip() if not isinstance(call_id, str) or not call_id.strip(): continue items.append({ @@ -1278,6 +1401,8 @@ class AIAgent: reasoning_parts: List[str] = [] tool_calls: List[Any] = [] has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"} + saw_commentary_phase = False + saw_final_answer_phase = False for item in output: item_type = getattr(item, "type", None) @@ -1291,6 +1416,13 @@ class AIAgent: has_incomplete_items = True if item_type == "message": + item_phase = getattr(item, "phase", None) + if isinstance(item_phase, str): + normalized_phase = item_phase.strip().lower() + if normalized_phase in {"commentary", "analysis"}: + saw_commentary_phase = True + elif normalized_phase in {"final_answer", "final"}: + saw_final_answer_phase = True message_text = self._extract_responses_message_text(item) if message_text: content_parts.append(message_text) @@ -1305,9 +1437,19 @@ class AIAgent: arguments = getattr(item, "arguments", "{}") if not isinstance(arguments, str): arguments = str(arguments) - call_id = getattr(item, "call_id", None) or getattr(item, "id", None) or f"call_{uuid.uuid4().hex[:12]}" + raw_call_id = getattr(item, "call_id", None) + raw_item_id = getattr(item, "id", None) + embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) + call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = call_id.strip() + response_item_id = raw_item_id if isinstance(raw_item_id, str) else None + response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) tool_calls.append(SimpleNamespace( id=call_id, + call_id=call_id, + response_item_id=response_item_id, type="function", function=SimpleNamespace(name=fn_name, arguments=arguments), )) @@ -1316,9 +1458,19 @@ class AIAgent: arguments = getattr(item, "input", "{}") if not isinstance(arguments, str): arguments = str(arguments) - call_id = getattr(item, "call_id", None) or getattr(item, "id", None) or f"call_{uuid.uuid4().hex[:12]}" + raw_call_id = getattr(item, "call_id", None) + raw_item_id = getattr(item, "id", None) + embedded_call_id, _ = self._split_responses_tool_id(raw_item_id) + call_id = raw_call_id if isinstance(raw_call_id, str) and raw_call_id.strip() else embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = call_id.strip() + response_item_id = raw_item_id if isinstance(raw_item_id, str) else None + response_item_id = self._derive_responses_function_call_id(call_id, response_item_id) tool_calls.append(SimpleNamespace( id=call_id, + call_id=call_id, + response_item_id=response_item_id, type="function", function=SimpleNamespace(name=fn_name, arguments=arguments), )) @@ -1339,7 +1491,7 @@ class AIAgent: if tool_calls: finish_reason = "tool_calls" - elif has_incomplete_items: + elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): finish_reason = "incomplete" else: finish_reason = "stop" @@ -1484,17 +1636,42 @@ class AIAgent: ] if assistant_message.tool_calls: - msg["tool_calls"] = [ - { - "id": tool_call.id, + tool_calls = [] + for tool_call in assistant_message.tool_calls: + raw_id = getattr(tool_call, "id", None) + call_id = getattr(tool_call, "call_id", None) + if not isinstance(call_id, str) or not call_id.strip(): + embedded_call_id, _ = self._split_responses_tool_id(raw_id) + call_id = embedded_call_id + if not isinstance(call_id, str) or not call_id.strip(): + if isinstance(raw_id, str) and raw_id.strip(): + call_id = raw_id.strip() + else: + call_id = f"call_{uuid.uuid4().hex[:12]}" + call_id = call_id.strip() + + response_item_id = getattr(tool_call, "response_item_id", None) + if not isinstance(response_item_id, str) or not response_item_id.strip(): + _, embedded_response_item_id = self._split_responses_tool_id(raw_id) + response_item_id = embedded_response_item_id + + response_item_id = self._derive_responses_function_call_id( + call_id, + response_item_id if isinstance(response_item_id, str) else None, + ) + + tool_calls.append({ + "id": call_id, + "call_id": call_id, + "response_item_id": response_item_id, "type": tool_call.type, "function": { "name": tool_call.function.name, "arguments": tool_call.function.arguments - } + }, } - for tool_call in assistant_message.tool_calls - ] + ) + msg["tool_calls"] = tool_calls return msg @@ -2021,6 +2198,7 @@ class AIAgent: api_call_count = 0 final_response = None interrupted = False + codex_ack_continuations = 0 # Clear any stale interrupt state at start self.clear_interrupt() @@ -2742,6 +2920,36 @@ class AIAgent: # Reset retry counter on successful content if hasattr(self, '_empty_content_retries'): self._empty_content_retries = 0 + + if ( + self.api_mode == "codex_responses" + and self.valid_tool_names + and codex_ack_continuations < 2 + and self._looks_like_codex_intermediate_ack( + user_message=user_message, + assistant_content=final_response, + messages=messages, + ) + ): + codex_ack_continuations += 1 + interim_msg = self._build_assistant_message(assistant_message, "incomplete") + messages.append(interim_msg) + self._log_msg_to_db(interim_msg) + + continue_msg = { + "role": "user", + "content": ( + "[System: Continue now. Execute the required tool calls and only " + "send your final answer after completing the task.]" + ), + } + messages.append(continue_msg) + self._log_msg_to_db(continue_msg) + self._session_messages = messages + self._save_session_log(messages) + continue + + codex_ack_continuations = 0 final_msg = self._build_assistant_message(assistant_message, finish_reason) diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py index 846d9c1c0..27723bd67 100644 --- a/tests/test_run_agent_codex_responses.py +++ b/tests/test_run_agent_codex_responses.py @@ -66,7 +66,7 @@ def _codex_tool_call_response(): output=[ SimpleNamespace( type="function_call", - id="call_1", + id="fc_1", call_id="call_1", name="terminal", arguments="{}", @@ -93,6 +93,37 @@ def _codex_incomplete_message_response(text: str): ) +def _codex_commentary_message_response(text: str): + return SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + phase="commentary", + status="completed", + content=[SimpleNamespace(type="output_text", text=text)], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5-codex", + ) + + +def _codex_ack_message_response(text: str): + return SimpleNamespace( + output=[ + SimpleNamespace( + type="message", + status="completed", + content=[SimpleNamespace(type="output_text", text=text)], + ) + ], + usage=SimpleNamespace(input_tokens=4, output_tokens=2, total_tokens=6), + status="completed", + model="gpt-5-codex", + ) + + def test_api_mode_uses_explicit_provider_when_codex(monkeypatch): _patch_agent_bootstrap(monkeypatch) agent = run_agent.AIAgent( @@ -157,6 +188,7 @@ def test_build_api_kwargs_codex(monkeypatch): assert kwargs["input"][0]["role"] == "user" assert kwargs["tools"][0]["type"] == "function" assert kwargs["tools"][0]["name"] == "terminal" + assert kwargs["tools"][0]["strict"] is False assert "function" not in kwargs["tools"][0] @@ -197,6 +229,99 @@ def test_run_conversation_codex_tool_round_trip(monkeypatch): assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) +def test_chat_messages_to_responses_input_uses_fc_id_for_function_call(monkeypatch): + agent = _build_agent(monkeypatch) + items = agent._chat_messages_to_responses_input( + [ + {"role": "user", "content": "Run terminal"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_abc123", + "type": "function", + "function": {"name": "terminal", "arguments": "{}"}, + } + ], + }, + {"role": "tool", "tool_call_id": "call_abc123", "content": '{"ok":true}'}, + ] + ) + + function_call = next(item for item in items if item.get("type") == "function_call") + function_output = next(item for item in items if item.get("type") == "function_call_output") + + assert function_call["call_id"] == "call_abc123" + assert function_call["id"] == "fc_abc123" + assert function_output["call_id"] == "call_abc123" + + +def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch): + agent = _build_agent(monkeypatch) + items = agent._chat_messages_to_responses_input( + [ + {"role": "user", "content": "Run terminal"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_pair123|fc_pair123", + "type": "function", + "function": {"name": "terminal", "arguments": "{}"}, + } + ], + }, + {"role": "tool", "tool_call_id": "call_pair123|fc_pair123", "content": '{"ok":true}'}, + ] + ) + + function_call = next(item for item in items if item.get("type") == "function_call") + function_output = next(item for item in items if item.get("type") == "function_call_output") + + assert function_call["call_id"] == "call_pair123" + assert function_call["id"] == "fc_pair123" + assert function_output["call_id"] == "call_pair123" + + +def test_run_conversation_codex_replay_payload_keeps_call_id_and_fc_id(monkeypatch): + agent = _build_agent(monkeypatch) + responses = [_codex_tool_call_response(), _codex_message_response("done")] + requests = [] + + def _fake_api_call(api_kwargs): + requests.append(api_kwargs) + return responses.pop(0) + + monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call) + + def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): + for call in assistant_message.tool_calls: + messages.append( + { + "role": "tool", + "tool_call_id": call.id, + "content": '{"ok":true}', + } + ) + + monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) + + result = agent.run_conversation("run a command") + + assert result["completed"] is True + assert result["final_response"] == "done" + assert len(requests) >= 2 + + replay_input = requests[1]["input"] + function_call = next(item for item in replay_input if item.get("type") == "function_call") + function_output = next(item for item in replay_input if item.get("type") == "function_call_output") + assert function_call["call_id"] == "call_1" + assert function_call["id"] == "fc_1" + assert function_output["call_id"] == "call_1" + + def test_run_conversation_codex_continues_after_incomplete_interim_message(monkeypatch): agent = _build_agent(monkeypatch) responses = [ @@ -229,3 +354,88 @@ def test_run_conversation_codex_continues_after_incomplete_interim_message(monke for msg in result["messages"] ) assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) + + +def test_normalize_codex_response_marks_commentary_only_message_as_incomplete(monkeypatch): + agent = _build_agent(monkeypatch) + assistant_message, finish_reason = agent._normalize_codex_response( + _codex_commentary_message_response("I'll inspect the repository first.") + ) + + assert finish_reason == "incomplete" + assert "inspect the repository" in (assistant_message.content or "") + + +def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch): + agent = _build_agent(monkeypatch) + responses = [ + _codex_commentary_message_response("I'll inspect the repo structure first."), + _codex_tool_call_response(), + _codex_message_response("Architecture summary complete."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): + for call in assistant_message.tool_calls: + messages.append( + { + "role": "tool", + "tool_call_id": call.id, + "content": '{"ok":true}', + } + ) + + monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) + + result = agent.run_conversation("analyze repo") + + assert result["completed"] is True + assert result["final_response"] == "Architecture summary complete." + assert any( + msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + and "inspect the repo structure" in (msg.get("content") or "") + for msg in result["messages"] + ) + assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) + + +def test_run_conversation_codex_continues_after_ack_stop_message(monkeypatch): + agent = _build_agent(monkeypatch) + responses = [ + _codex_ack_message_response( + "Absolutely — I can do that. I'll inspect ~/openclaw-studio and report back with a walkthrough." + ), + _codex_tool_call_response(), + _codex_message_response("Architecture summary complete."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + def _fake_execute_tool_calls(assistant_message, messages, effective_task_id): + for call in assistant_message.tool_calls: + messages.append( + { + "role": "tool", + "tool_call_id": call.id, + "content": '{"ok":true}', + } + ) + + monkeypatch.setattr(agent, "_execute_tool_calls", _fake_execute_tool_calls) + + result = agent.run_conversation("look into ~/openclaw-studio and tell me how it works") + + assert result["completed"] is True + assert result["final_response"] == "Architecture summary complete." + assert any( + msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + and "inspect ~/openclaw-studio" in (msg.get("content") or "") + for msg in result["messages"] + ) + assert any( + msg.get("role") == "user" + and "Continue now. Execute the required tool calls" in (msg.get("content") or "") + for msg in result["messages"] + ) + assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"])