refactor: extract atomic_json_write helper, add 24 checkpoint tests
Extract the duplicated temp-file + fsync + os.replace pattern from
batch_runner.py (1 instance) and process_registry.py (2 instances) into
a shared utils.atomic_json_write() function.
Add 12 tests for atomic_json_write covering: valid JSON, parent dir
creation, overwrite, crash safety (original preserved on error), no temp
file leaks, string paths, unicode, custom indent, concurrent writes.
Add 12 tests for batch_runner checkpoint behavior covering:
_save_checkpoint (valid JSON, last_updated, overwrite, lock/no-lock,
parent dirs, no temp leaks), _load_checkpoint (missing file, existing
data, corrupt JSON), and resume logic (preserves prior progress,
different run_name starts fresh).
2026-03-06 05:50:12 -08:00
|
|
|
"""Shared utility functions for hermes-agent."""
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import tempfile
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Any, Union
|
|
|
|
|
|
2026-03-08 18:55:09 +03:30
|
|
|
import yaml
|
|
|
|
|
|
refactor: extract atomic_json_write helper, add 24 checkpoint tests
Extract the duplicated temp-file + fsync + os.replace pattern from
batch_runner.py (1 instance) and process_registry.py (2 instances) into
a shared utils.atomic_json_write() function.
Add 12 tests for atomic_json_write covering: valid JSON, parent dir
creation, overwrite, crash safety (original preserved on error), no temp
file leaks, string paths, unicode, custom indent, concurrent writes.
Add 12 tests for batch_runner checkpoint behavior covering:
_save_checkpoint (valid JSON, last_updated, overwrite, lock/no-lock,
parent dirs, no temp leaks), _load_checkpoint (missing file, existing
data, corrupt JSON), and resume logic (preserves prior progress,
different run_name starts fresh).
2026-03-06 05:50:12 -08:00
|
|
|
|
2026-03-14 02:56:13 -07:00
|
|
|
def atomic_json_write(
|
|
|
|
|
path: Union[str, Path],
|
|
|
|
|
data: Any,
|
|
|
|
|
*,
|
|
|
|
|
indent: int = 2,
|
|
|
|
|
**dump_kwargs: Any,
|
|
|
|
|
) -> None:
|
refactor: extract atomic_json_write helper, add 24 checkpoint tests
Extract the duplicated temp-file + fsync + os.replace pattern from
batch_runner.py (1 instance) and process_registry.py (2 instances) into
a shared utils.atomic_json_write() function.
Add 12 tests for atomic_json_write covering: valid JSON, parent dir
creation, overwrite, crash safety (original preserved on error), no temp
file leaks, string paths, unicode, custom indent, concurrent writes.
Add 12 tests for batch_runner checkpoint behavior covering:
_save_checkpoint (valid JSON, last_updated, overwrite, lock/no-lock,
parent dirs, no temp leaks), _load_checkpoint (missing file, existing
data, corrupt JSON), and resume logic (preserves prior progress,
different run_name starts fresh).
2026-03-06 05:50:12 -08:00
|
|
|
"""Write JSON data to a file atomically.
|
|
|
|
|
|
|
|
|
|
Uses temp file + fsync + os.replace to ensure the target file is never
|
2026-03-14 02:56:13 -07:00
|
|
|
left in a partially-written state. If the process crashes mid-write,
|
refactor: extract atomic_json_write helper, add 24 checkpoint tests
Extract the duplicated temp-file + fsync + os.replace pattern from
batch_runner.py (1 instance) and process_registry.py (2 instances) into
a shared utils.atomic_json_write() function.
Add 12 tests for atomic_json_write covering: valid JSON, parent dir
creation, overwrite, crash safety (original preserved on error), no temp
file leaks, string paths, unicode, custom indent, concurrent writes.
Add 12 tests for batch_runner checkpoint behavior covering:
_save_checkpoint (valid JSON, last_updated, overwrite, lock/no-lock,
parent dirs, no temp leaks), _load_checkpoint (missing file, existing
data, corrupt JSON), and resume logic (preserves prior progress,
different run_name starts fresh).
2026-03-06 05:50:12 -08:00
|
|
|
the previous version of the file remains intact.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
path: Target file path (will be created or overwritten).
|
|
|
|
|
data: JSON-serializable data to write.
|
|
|
|
|
indent: JSON indentation (default 2).
|
2026-03-14 02:56:13 -07:00
|
|
|
**dump_kwargs: Additional keyword args forwarded to json.dump(), such
|
|
|
|
|
as default=str for non-native types.
|
refactor: extract atomic_json_write helper, add 24 checkpoint tests
Extract the duplicated temp-file + fsync + os.replace pattern from
batch_runner.py (1 instance) and process_registry.py (2 instances) into
a shared utils.atomic_json_write() function.
Add 12 tests for atomic_json_write covering: valid JSON, parent dir
creation, overwrite, crash safety (original preserved on error), no temp
file leaks, string paths, unicode, custom indent, concurrent writes.
Add 12 tests for batch_runner checkpoint behavior covering:
_save_checkpoint (valid JSON, last_updated, overwrite, lock/no-lock,
parent dirs, no temp leaks), _load_checkpoint (missing file, existing
data, corrupt JSON), and resume logic (preserves prior progress,
different run_name starts fresh).
2026-03-06 05:50:12 -08:00
|
|
|
"""
|
|
|
|
|
path = Path(path)
|
|
|
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
fd, tmp_path = tempfile.mkstemp(
|
|
|
|
|
dir=str(path.parent),
|
|
|
|
|
prefix=f".{path.stem}_",
|
|
|
|
|
suffix=".tmp",
|
|
|
|
|
)
|
|
|
|
|
try:
|
|
|
|
|
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
2026-03-14 02:56:13 -07:00
|
|
|
json.dump(
|
|
|
|
|
data,
|
|
|
|
|
f,
|
|
|
|
|
indent=indent,
|
|
|
|
|
ensure_ascii=False,
|
|
|
|
|
**dump_kwargs,
|
|
|
|
|
)
|
refactor: extract atomic_json_write helper, add 24 checkpoint tests
Extract the duplicated temp-file + fsync + os.replace pattern from
batch_runner.py (1 instance) and process_registry.py (2 instances) into
a shared utils.atomic_json_write() function.
Add 12 tests for atomic_json_write covering: valid JSON, parent dir
creation, overwrite, crash safety (original preserved on error), no temp
file leaks, string paths, unicode, custom indent, concurrent writes.
Add 12 tests for batch_runner checkpoint behavior covering:
_save_checkpoint (valid JSON, last_updated, overwrite, lock/no-lock,
parent dirs, no temp leaks), _load_checkpoint (missing file, existing
data, corrupt JSON), and resume logic (preserves prior progress,
different run_name starts fresh).
2026-03-06 05:50:12 -08:00
|
|
|
f.flush()
|
|
|
|
|
os.fsync(f.fileno())
|
|
|
|
|
os.replace(tmp_path, path)
|
|
|
|
|
except BaseException:
|
2026-03-14 22:31:51 -07:00
|
|
|
# Intentionally catch BaseException so temp-file cleanup still runs for
|
|
|
|
|
# KeyboardInterrupt/SystemExit before re-raising the original signal.
|
refactor: extract atomic_json_write helper, add 24 checkpoint tests
Extract the duplicated temp-file + fsync + os.replace pattern from
batch_runner.py (1 instance) and process_registry.py (2 instances) into
a shared utils.atomic_json_write() function.
Add 12 tests for atomic_json_write covering: valid JSON, parent dir
creation, overwrite, crash safety (original preserved on error), no temp
file leaks, string paths, unicode, custom indent, concurrent writes.
Add 12 tests for batch_runner checkpoint behavior covering:
_save_checkpoint (valid JSON, last_updated, overwrite, lock/no-lock,
parent dirs, no temp leaks), _load_checkpoint (missing file, existing
data, corrupt JSON), and resume logic (preserves prior progress,
different run_name starts fresh).
2026-03-06 05:50:12 -08:00
|
|
|
try:
|
|
|
|
|
os.unlink(tmp_path)
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
raise
|
2026-03-08 18:55:09 +03:30
|
|
|
|
|
|
|
|
|
|
|
|
|
def atomic_yaml_write(
|
|
|
|
|
path: Union[str, Path],
|
|
|
|
|
data: Any,
|
|
|
|
|
*,
|
|
|
|
|
default_flow_style: bool = False,
|
|
|
|
|
sort_keys: bool = False,
|
|
|
|
|
extra_content: str | None = None,
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Write YAML data to a file atomically.
|
|
|
|
|
|
|
|
|
|
Uses temp file + fsync + os.replace to ensure the target file is never
|
|
|
|
|
left in a partially-written state. If the process crashes mid-write,
|
|
|
|
|
the previous version of the file remains intact.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
path: Target file path (will be created or overwritten).
|
|
|
|
|
data: YAML-serializable data to write.
|
|
|
|
|
default_flow_style: YAML flow style (default False).
|
|
|
|
|
sort_keys: Whether to sort dict keys (default False).
|
|
|
|
|
extra_content: Optional string to append after the YAML dump
|
|
|
|
|
(e.g. commented-out sections for user reference).
|
|
|
|
|
"""
|
|
|
|
|
path = Path(path)
|
|
|
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
fd, tmp_path = tempfile.mkstemp(
|
|
|
|
|
dir=str(path.parent),
|
|
|
|
|
prefix=f".{path.stem}_",
|
|
|
|
|
suffix=".tmp",
|
|
|
|
|
)
|
|
|
|
|
try:
|
|
|
|
|
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
|
|
|
|
yaml.dump(data, f, default_flow_style=default_flow_style, sort_keys=sort_keys)
|
|
|
|
|
if extra_content:
|
|
|
|
|
f.write(extra_content)
|
|
|
|
|
f.flush()
|
|
|
|
|
os.fsync(f.fileno())
|
|
|
|
|
os.replace(tmp_path, path)
|
|
|
|
|
except BaseException:
|
2026-03-14 22:31:51 -07:00
|
|
|
# Match atomic_json_write: cleanup must also happen for process-level
|
|
|
|
|
# interruptions before we re-raise them.
|
2026-03-08 18:55:09 +03:30
|
|
|
try:
|
|
|
|
|
os.unlink(tmp_path)
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
raise
|