From 9a7ed81b4bb53ea2556d77b532d6f7e262bdd5c4 Mon Sep 17 00:00:00 2001 From: alireza78a Date: Fri, 13 Mar 2026 08:04:36 -0700 Subject: [PATCH] fix(cron): use atomic write in save_job_output to prevent data loss on crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit save_job_output() used bare open('w') which truncates the output file immediately. A crash or OOM kill between truncation and the completed write would silently wipe the job output. Write now goes to a temp file first, then os.replace() swaps it atomically — matching the existing save_jobs() pattern in the same file. Preserves _secure_file() permissions and uses safe cleanup on error. Cherry-picked from PR #874 by alireza78a, rebased onto current main with conflict resolution and fixes: - Kept _secure_dir/_secure_file security calls from PR #757 - Used except BaseException (not bare except) to match save_jobs pattern - Wrapped os.unlink in try/except OSError to avoid masking errors Co-authored-by: alireza78a --- cron/jobs.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/cron/jobs.py b/cron/jobs.py index 6cbb168f0..186424c65 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -431,8 +431,19 @@ def save_job_output(job_id: str, output: str): timestamp = _hermes_now().strftime("%Y-%m-%d_%H-%M-%S") output_file = job_output_dir / f"{timestamp}.md" - with open(output_file, 'w', encoding='utf-8') as f: - f.write(output) - _secure_file(output_file) + fd, tmp_path = tempfile.mkstemp(dir=str(job_output_dir), suffix='.tmp', prefix='.output_') + try: + with os.fdopen(fd, 'w', encoding='utf-8') as f: + f.write(output) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, output_file) + _secure_file(output_file) + except BaseException: + try: + os.unlink(tmp_path) + except OSError: + pass + raise return output_file