Compare commits

...

3 Commits

Author SHA1 Message Date
fc0d8fe5e9 fix: extend JSON repair to ALL remaining json.loads sites (#809)
Some checks failed
Nix / nix (macos-latest) (pull_request) Waiting to run
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Nix / nix (ubuntu-latest) (pull_request) Failing after 2s
Contributor Attribution Check / check-attribution (pull_request) Successful in 26s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 26s
Tests / e2e (pull_request) Successful in 2m50s
Tests / test (pull_request) Failing after 1h17m49s
2026-04-16 02:53:41 +00:00
4752a0085e fix: extend JSON repair to remaining json.loads sites in run_agent.py (#809) 2026-04-16 02:40:51 +00:00
b26a6ec23b feat: add repair_and_load_json() to utils.py (#809) 2026-04-16 02:38:01 +00:00
2 changed files with 88 additions and 12 deletions

View File

@@ -106,7 +106,7 @@ from agent.trajectory import (
convert_scratchpad_to_think, has_incomplete_scratchpad,
save_trajectory as _save_trajectory_to_file,
)
from utils import atomic_json_write, env_var_enabled
from utils import atomic_json_write, env_var_enabled, repair_and_load_json
@@ -277,7 +277,7 @@ def _should_parallelize_tool_batch(tool_calls) -> bool:
for tool_call in tool_calls:
tool_name = tool_call.function.name
try:
function_args = json.loads(tool_call.function.arguments)
function_args = repair_and_load_json(tool_call.function.arguments, default={})
except Exception:
logging.debug(
"Could not parse args for %s — defaulting to sequential; raw=%s",
@@ -2247,7 +2247,7 @@ class AIAgent:
if not isinstance(msg, dict) or msg.get("role") != "tool":
continue
try:
data = json.loads(msg.get("content", "{}"))
data = repair_and_load_json(msg.get("content", "{}"), default={})
except (json.JSONDecodeError, TypeError):
continue
if not data.get("success"):
@@ -2497,7 +2497,7 @@ class AIAgent:
# Parse arguments - should always succeed since we validate during conversation
# but keep try-except as safety net
try:
arguments = json.loads(tool_call["function"]["arguments"]) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
arguments = repair_and_load_json(tool_call["function"]["arguments"], default={}) if isinstance(tool_call["function"]["arguments"], str) else tool_call["function"]["arguments"]
except json.JSONDecodeError:
# This shouldn't happen since we validate and retry during conversation,
# but if it does, log warning and use empty dict
@@ -2532,7 +2532,7 @@ class AIAgent:
tool_content = tool_msg["content"]
try:
if tool_content.strip().startswith(("{", "[")):
tool_content = json.loads(tool_content)
tool_content = repair_and_load_json(tool_content, default=tool_content)
except (json.JSONDecodeError, AttributeError):
pass # Keep as string if not valid JSON
@@ -2885,7 +2885,7 @@ class AIAgent:
# with partial history and would otherwise clobber the full JSON log.
if self.session_log_file.exists():
try:
existing = json.loads(self.session_log_file.read_text(encoding="utf-8"))
existing = repair_and_load_json(self.session_log_file.read_text(encoding="utf-8"), default={})
existing_count = existing.get("message_count", len(existing.get("messages", [])))
if existing_count > len(cleaned):
logging.debug(
@@ -3116,7 +3116,7 @@ class AIAgent:
if '"todos"' not in content:
continue
try:
data = json.loads(content)
data = repair_and_load_json(content, default={})
if "todos" in data and isinstance(data["todos"], list):
last_todo_response = data["todos"]
break
@@ -5960,7 +5960,7 @@ class AIAgent:
result_json = asyncio.run(
vision_analyze_tool(image_url=vision_source, user_prompt=analysis_prompt)
)
result = json.loads(result_json) if isinstance(result_json, str) else {}
result = repair_and_load_json(result_json, default={}) if isinstance(result_json, str) else {}
description = (result.get("analysis") or "").strip()
except Exception as e:
description = f"Image analysis failed: {e}"
@@ -6758,7 +6758,7 @@ class AIAgent:
for tc in tool_calls:
if tc.function.name == "memory":
try:
args = json.loads(tc.function.arguments)
args = repair_and_load_json(tc.function.arguments, default={})
flush_target = args.get("target", "memory")
from tools.memory_tool import memory_tool as _memory_tool
_memory_tool(
@@ -7065,7 +7065,7 @@ class AIAgent:
self._iters_since_skill = 0
try:
function_args = json.loads(tool_call.function.arguments)
function_args = repair_and_load_json(tool_call.function.arguments, default={})
except json.JSONDecodeError:
function_args = {}
if not isinstance(function_args, dict):
@@ -7262,7 +7262,7 @@ class AIAgent:
function_name = tool_call.function.name
try:
function_args = json.loads(tool_call.function.arguments)
function_args = repair_and_load_json(tool_call.function.arguments, default={})
except json.JSONDecodeError as e:
logging.warning(f"Unexpected JSON error after validation: {e}")
function_args = {}
@@ -8297,7 +8297,7 @@ class AIAgent:
for tc in tcs:
if isinstance(tc, dict) and "function" in tc:
try:
args_obj = json.loads(tc["function"]["arguments"])
args_obj = repair_and_load_json(tc["function"]["arguments"], default={})
tc = {**tc, "function": {
**tc["function"],
"arguments": json.dumps(

View File

@@ -145,6 +145,82 @@ def safe_json_loads(text: str, default: Any = None) -> Any:
return default
def repair_and_load_json(text: str, default: Any = None, log_warning: bool = False) -> Any:
"""Parse JSON with repair fallback for malformed LLM output.
Tries standard json.loads first. On failure, attempts common repairs:
- Strip trailing commas before } or ]
- Fix single quotes to double quotes
- Strip markdown code fences
- Find JSON object/array boundaries in surrounding text
Args:
text: The JSON string to parse
default: Value to return on failure
log_warning: If True, log a warning on repair
Returns:
Parsed JSON value, or *default* if unrepairable.
"""
if not text or not isinstance(text, str):
return default
text = text.strip()
if not text:
return default
# Try direct parse first (most common case)
try:
return json.loads(text)
except (json.JSONDecodeError, TypeError, ValueError):
pass
repaired = text
# Strip markdown code fences
if repaired.startswith("```"):
fence_lines = repaired.split("\n")
if len(fence_lines) > 2:
# Remove first line (```json or ```) and last line (```)
if fence_lines[-1].strip() == "```":
repaired = "\n".join(fence_lines[1:-1])
else:
repaired = "\n".join(fence_lines[1:])
# Find JSON object/array boundaries
for start_char, end_char in [("{", "}"), ("[", "]")]:
start_idx = repaired.find(start_char)
end_idx = repaired.rfind(end_char)
if start_idx != -1 and end_idx > start_idx:
repaired = repaired[start_idx:end_idx + 1]
break
# Fix trailing commas
repaired = re.sub(r',\s*([}\]])', r'\1', repaired)
# Fix single quotes to double quotes (heuristic for LLM output)
repaired = re.sub(r"(?<!\\)'", '"', repaired)
# Try parse again
try:
result = json.loads(repaired)
if log_warning:
logging.debug("JSON repaired (%d -> %d chars)", len(text), len(repaired))
return result
except (json.JSONDecodeError, TypeError, ValueError):
pass
# Last resort: strict=False
try:
return json.loads(repaired, strict=False)
except (json.JSONDecodeError, TypeError, ValueError):
pass
if log_warning:
logging.warning("JSON repair failed, using default. Input: %.100s...", text)
return default
# ─── Environment Variable Helpers ─────────────────────────────────────────────