From 2c88aa96ee3d3e536ce67bccd45d354e2ea6fe2d Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Sun, 12 Apr 2026 22:29:54 -0400 Subject: [PATCH] fix(cron): Thread-safe save_jobs with file lock Prevents race condition when parallel tick workers call mark_job_run() concurrently. Uses fcntl.flock() to serialize jobs.json writes. Without this fix, 6 parallel workers could read-modify-write jobs.json simultaneously, causing lost status updates. --- cron/jobs.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/cron/jobs.py b/cron/jobs.py index 4d5d84ec9..3af8278b4 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -347,22 +347,26 @@ def load_jobs() -> List[Dict[str, Any]]: def save_jobs(jobs: List[Dict[str, Any]]): - """Save all jobs to storage.""" + """Save all jobs to storage. Thread-safe via file lock.""" ensure_dirs() - fd, tmp_path = tempfile.mkstemp(dir=str(JOBS_FILE.parent), suffix='.tmp', prefix='.jobs_') - try: - with os.fdopen(fd, 'w', encoding='utf-8') as f: - json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2) - f.flush() - os.fsync(f.fileno()) - os.replace(tmp_path, JOBS_FILE) - _secure_file(JOBS_FILE) - except BaseException: + import fcntl + lock_path = JOBS_FILE.parent / ".jobs.lock" + with open(lock_path, "w") as lock_fd: + fcntl.flock(lock_fd, fcntl.LOCK_EX) + fd, tmp_path = tempfile.mkstemp(dir=str(JOBS_FILE.parent), suffix='.tmp', prefix='.jobs_') try: - os.unlink(tmp_path) - except OSError: - pass - raise + with os.fdopen(fd, 'w', encoding='utf-8') as f: + json.dump({"jobs": jobs, "updated_at": _hermes_now().isoformat()}, f, indent=2) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, JOBS_FILE) + _secure_file(JOBS_FILE) + except BaseException: + try: + os.unlink(tmp_path) + except OSError: + pass + raise def create_job(