fix(session): skip corrupt lines in load_transcript instead of crashing (#1744)

Wrap json.loads() in load_transcript() with try/except JSONDecodeError so that partial JSONL lines (from mid-write crashes like OOM/SIGKILL) are skipped with a warning instead of crashing the entire transcript load. The rest of the history loads fine. Adds a logger.warning with the session ID and truncated corrupt line content for debugging visibility. Salvaged from PR #1193 by alireza78a. Closes #1193
2026-03-17 05:18:12 -07:00
parent aea39eeafb
commit 702191049f
2 changed files with 57 additions and 1 deletions
--- a/gateway/session.py
+++ b/gateway/session.py
@@ -944,7 +944,13 @@ class SessionStore:
            for line in f:
                line = line.strip()
                if line:
-                    messages.append(json.loads(line))
+                    try:
+                        messages.append(json.loads(line))
+                    except json.JSONDecodeError:
+                        logger.warning(
+                            "Skipping corrupt line in transcript %s: %s",
+                            session_id, line[:120],
+                        )
        
        return messages

--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@@ -336,6 +336,56 @@ class TestSessionStoreRewriteTranscript:
        assert reloaded == []


+class TestLoadTranscriptCorruptLines:
+    """Regression: corrupt JSONL lines (e.g. from mid-write crash) must be
+    skipped instead of crashing the entire transcript load.  GH-1193."""
+
+    @pytest.fixture()
+    def store(self, tmp_path):
+        config = GatewayConfig()
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            s = SessionStore(sessions_dir=tmp_path, config=config)
+        s._db = None
+        s._loaded = True
+        return s
+
+    def test_corrupt_line_skipped(self, store, tmp_path):
+        session_id = "corrupt_test"
+        transcript_path = store.get_transcript_path(session_id)
+        transcript_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(transcript_path, "w") as f:
+            f.write('{"role": "user", "content": "hello"}\n')
+            f.write('{"role": "assistant", "content": "hi th')  # truncated
+            f.write("\n")
+            f.write('{"role": "user", "content": "goodbye"}\n')
+
+        messages = store.load_transcript(session_id)
+        assert len(messages) == 2
+        assert messages[0]["content"] == "hello"
+        assert messages[1]["content"] == "goodbye"
+
+    def test_all_lines_corrupt_returns_empty(self, store, tmp_path):
+        session_id = "all_corrupt"
+        transcript_path = store.get_transcript_path(session_id)
+        transcript_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(transcript_path, "w") as f:
+            f.write("not json at all\n")
+            f.write("{truncated\n")
+
+        messages = store.load_transcript(session_id)
+        assert messages == []
+
+    def test_valid_transcript_unaffected(self, store, tmp_path):
+        session_id = "valid_test"
+        store.append_to_transcript(session_id, {"role": "user", "content": "a"})
+        store.append_to_transcript(session_id, {"role": "assistant", "content": "b"})
+
+        messages = store.load_transcript(session_id)
+        assert len(messages) == 2
+        assert messages[0]["content"] == "a"
+        assert messages[1]["content"] == "b"
+
+
 class TestWhatsAppDMSessionKeyConsistency:
    """Regression: all session-key construction must go through build_session_key
    so DMs are isolated by chat_id across platforms."""