fix(cron): use atomic write in save_job_output to prevent data loss on crash
save_job_output() used bare open('w') which truncates the output file
immediately. A crash or OOM kill between truncation and the completed
write would silently wipe the job output.
Write now goes to a temp file first, then os.replace() swaps it
atomically — matching the existing save_jobs() pattern in the same file.
Preserves _secure_file() permissions and uses safe cleanup on error.
Cherry-picked from PR #874 by alireza78a, rebased onto current main
with conflict resolution and fixes:
- Kept _secure_dir/_secure_file security calls from PR #757
- Used except BaseException (not bare except) to match save_jobs pattern
- Wrapped os.unlink in try/except OSError to avoid masking errors
Co-authored-by: alireza78a <alireza78a@users.noreply.github.com>
This commit is contained in:
17
cron/jobs.py
17
cron/jobs.py
@@ -431,8 +431,19 @@ def save_job_output(job_id: str, output: str):
|
||||
timestamp = _hermes_now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
output_file = job_output_dir / f"{timestamp}.md"
|
||||
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(output)
|
||||
_secure_file(output_file)
|
||||
fd, tmp_path = tempfile.mkstemp(dir=str(job_output_dir), suffix='.tmp', prefix='.output_')
|
||||
try:
|
||||
with os.fdopen(fd, 'w', encoding='utf-8') as f:
|
||||
f.write(output)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, output_file)
|
||||
_secure_file(output_file)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
return output_file
|
||||
|
||||
Reference in New Issue
Block a user