Some checks failed
Contributor Attribution Check / check-attribution (pull_request) Successful in 42s
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Nix / nix (ubuntu-latest) (pull_request) Failing after 4s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 36s
Tests / test (pull_request) Failing after 1h13m6s
Tests / e2e (pull_request) Successful in 1m32s
Nix / nix (macos-latest) (pull_request) Has been cancelled
Adds `repair_and_load_json()` to utils.py using the `json_repair` library as a fallback when `json.loads()` fails. Replaces 8 non-hot-path json.loads sites identified in issue #809: - L2250: trajectory/sanitization message content parsing - L2500: tool_call dict reconstruction in trajectory conversion - L2535: tool_content parsing (JSON-like strings in tool responses) - L2888: session log file loading (with warning on unrecoverable parse) - L3119: todo content parsing in message processing - L5963: vision result_json parsing - L6761: memory flush tool call argument parsing - L8300: cache serialization tool call args normalization Each site uses an appropriate default ({} for tool args, None/continue for content parsing) and a context label for debug tracing. Fixes #809
209 lines
7.0 KiB
Python
209 lines
7.0 KiB
Python
"""Shared utility functions for hermes-agent."""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Any, Union
|
|
|
|
import yaml
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
TRUTHY_STRINGS = frozenset({"1", "true", "yes", "on"})
|
|
|
|
|
|
def is_truthy_value(value: Any, default: bool = False) -> bool:
|
|
"""Coerce bool-ish values using the project's shared truthy string set."""
|
|
if value is None:
|
|
return default
|
|
if isinstance(value, bool):
|
|
return value
|
|
if isinstance(value, str):
|
|
return value.strip().lower() in TRUTHY_STRINGS
|
|
return bool(value)
|
|
|
|
|
|
def env_var_enabled(name: str, default: str = "") -> bool:
|
|
"""Return True when an environment variable is set to a truthy value."""
|
|
return is_truthy_value(os.getenv(name, default), default=False)
|
|
|
|
|
|
def atomic_json_write(
|
|
path: Union[str, Path],
|
|
data: Any,
|
|
*,
|
|
indent: int = 2,
|
|
**dump_kwargs: Any,
|
|
) -> None:
|
|
"""Write JSON data to a file atomically.
|
|
|
|
Uses temp file + fsync + os.replace to ensure the target file is never
|
|
left in a partially-written state. If the process crashes mid-write,
|
|
the previous version of the file remains intact.
|
|
|
|
Args:
|
|
path: Target file path (will be created or overwritten).
|
|
data: JSON-serializable data to write.
|
|
indent: JSON indentation (default 2).
|
|
**dump_kwargs: Additional keyword args forwarded to json.dump(), such
|
|
as default=str for non-native types.
|
|
"""
|
|
path = Path(path)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
fd, tmp_path = tempfile.mkstemp(
|
|
dir=str(path.parent),
|
|
prefix=f".{path.stem}_",
|
|
suffix=".tmp",
|
|
)
|
|
try:
|
|
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
|
json.dump(
|
|
data,
|
|
f,
|
|
indent=indent,
|
|
ensure_ascii=False,
|
|
**dump_kwargs,
|
|
)
|
|
f.flush()
|
|
os.fsync(f.fileno())
|
|
os.replace(tmp_path, path)
|
|
except BaseException:
|
|
# Intentionally catch BaseException so temp-file cleanup still runs for
|
|
# KeyboardInterrupt/SystemExit before re-raising the original signal.
|
|
try:
|
|
os.unlink(tmp_path)
|
|
except OSError:
|
|
pass
|
|
raise
|
|
|
|
|
|
def atomic_yaml_write(
|
|
path: Union[str, Path],
|
|
data: Any,
|
|
*,
|
|
default_flow_style: bool = False,
|
|
sort_keys: bool = False,
|
|
extra_content: str | None = None,
|
|
) -> None:
|
|
"""Write YAML data to a file atomically.
|
|
|
|
Uses temp file + fsync + os.replace to ensure the target file is never
|
|
left in a partially-written state. If the process crashes mid-write,
|
|
the previous version of the file remains intact.
|
|
|
|
Args:
|
|
path: Target file path (will be created or overwritten).
|
|
data: YAML-serializable data to write.
|
|
default_flow_style: YAML flow style (default False).
|
|
sort_keys: Whether to sort dict keys (default False).
|
|
extra_content: Optional string to append after the YAML dump
|
|
(e.g. commented-out sections for user reference).
|
|
"""
|
|
path = Path(path)
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
fd, tmp_path = tempfile.mkstemp(
|
|
dir=str(path.parent),
|
|
prefix=f".{path.stem}_",
|
|
suffix=".tmp",
|
|
)
|
|
try:
|
|
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
|
yaml.dump(data, f, default_flow_style=default_flow_style, sort_keys=sort_keys)
|
|
if extra_content:
|
|
f.write(extra_content)
|
|
f.flush()
|
|
os.fsync(f.fileno())
|
|
os.replace(tmp_path, path)
|
|
except BaseException:
|
|
# Match atomic_json_write: cleanup must also happen for process-level
|
|
# interruptions before we re-raise them.
|
|
try:
|
|
os.unlink(tmp_path)
|
|
except OSError:
|
|
pass
|
|
raise
|
|
|
|
|
|
# ─── JSON Helpers ─────────────────────────────────────────────────────────────
|
|
|
|
|
|
def safe_json_loads(text: str, default: Any = None) -> Any:
|
|
"""Parse JSON, returning *default* on any parse error.
|
|
|
|
Replaces the ``try: json.loads(x) except (JSONDecodeError, TypeError)``
|
|
pattern duplicated across display.py, anthropic_adapter.py,
|
|
auxiliary_client.py, and others.
|
|
"""
|
|
try:
|
|
return json.loads(text)
|
|
except (json.JSONDecodeError, TypeError, ValueError):
|
|
return default
|
|
|
|
|
|
def repair_and_load_json(text: str, default: Any = None, *, context: str = "") -> Any:
|
|
"""Parse JSON with automatic repair fallback.
|
|
|
|
Tries ``json.loads`` first. On failure, attempts to repair the string
|
|
using the ``json_repair`` library before falling back to *default*.
|
|
Logs a debug-level warning when repair is triggered so that callers can
|
|
observe silent-failure patterns without raising exceptions.
|
|
|
|
Args:
|
|
text: The JSON string to parse.
|
|
default: Value returned when both parse and repair fail.
|
|
context: Optional label included in the debug log (e.g. the call-site
|
|
name) to aid tracing.
|
|
|
|
Returns:
|
|
Parsed Python object, or *default* on unrecoverable failure.
|
|
"""
|
|
if not isinstance(text, str):
|
|
return default
|
|
try:
|
|
return json.loads(text)
|
|
except (json.JSONDecodeError, ValueError):
|
|
pass
|
|
|
|
try:
|
|
import json_repair # optional dependency
|
|
repaired = json_repair.repair_json(text, return_objects=True)
|
|
# json_repair returns "" when it cannot produce a valid structure.
|
|
# Guard against returning that sentinel as if it were a successful parse.
|
|
# Exception: if the original text was a JSON empty-string literal like `""`
|
|
# then "" is the correct parse result.
|
|
if repaired == "" and text.strip() not in ('""', "''"):
|
|
tag = f" [{context}]" if context else ""
|
|
logger.debug("repair_and_load_json%s: repair yielded empty string; returning default", tag)
|
|
return default
|
|
tag = f" [{context}]" if context else ""
|
|
logger.debug("repair_and_load_json%s: repaired malformed JSON (first 120 chars): %.120s", tag, text)
|
|
return repaired
|
|
except Exception as exc:
|
|
tag = f" [{context}]" if context else ""
|
|
logger.debug("repair_and_load_json%s: repair failed (%s); returning default", tag, exc)
|
|
return default
|
|
|
|
|
|
# ─── Environment Variable Helpers ─────────────────────────────────────────────
|
|
|
|
|
|
def env_int(key: str, default: int = 0) -> int:
|
|
"""Read an environment variable as an integer, with fallback."""
|
|
raw = os.getenv(key, "").strip()
|
|
if not raw:
|
|
return default
|
|
try:
|
|
return int(raw)
|
|
except (ValueError, TypeError):
|
|
return default
|
|
|
|
|
|
def env_bool(key: str, default: bool = False) -> bool:
|
|
"""Read an environment variable as a boolean."""
|
|
return is_truthy_value(os.getenv(key, ""), default=default)
|