From f90a627f9afd1f88b8f7daaa5da48e0060d261e3 Mon Sep 17 00:00:00 2001 From: Vicaversa Date: Wed, 4 Mar 2026 11:32:57 +0300 Subject: [PATCH] fix(gateway): add missing UTF-8 encoding to file I/O preventing crashes on Windows On Windows, Python's open() defaults to the system locale encoding (e.g. cp1254 for Turkish, cp1252 for Western European) instead of UTF-8. The gateway already uses ensure_ascii=False in json.dumps() to preserve Unicode characters in chat messages, but the corresponding open() calls lack encoding="utf-8". This mismatch causes UnicodeEncodeError / UnicodeDecodeError when users send non-ASCII messages (Turkish, Japanese, Arabic, emoji, etc.) through Telegram, Discord, WhatsApp, or Slack on Windows. The project already fixed this for .env files in hermes_cli/config.py (line 624) but the gateway module was missed. Files fixed: - gateway/session.py: session index + JSONL transcript read/write (5 calls) - gateway/channel_directory.py: channel directory read/write (3 calls) - gateway/mirror.py: session index read + transcript append (2 calls) --- gateway/channel_directory.py | 6 +++--- gateway/mirror.py | 4 ++-- gateway/session.py | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/gateway/channel_directory.py b/gateway/channel_directory.py index 622fed6bd..820743150 100644 --- a/gateway/channel_directory.py +++ b/gateway/channel_directory.py @@ -52,7 +52,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]: try: DIRECTORY_PATH.parent.mkdir(parents=True, exist_ok=True) - with open(DIRECTORY_PATH, "w") as f: + with open(DIRECTORY_PATH, "w", encoding="utf-8") as f: json.dump(directory, f, indent=2, ensure_ascii=False) except Exception as e: logger.warning("Channel directory: failed to write: %s", e) @@ -115,7 +115,7 @@ def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]: entries = [] try: - with open(sessions_path) as f: + with open(sessions_path, encoding="utf-8") as f: data = json.load(f) seen_ids = set() @@ -147,7 +147,7 @@ def load_directory() -> Dict[str, Any]: if not DIRECTORY_PATH.exists(): return {"updated_at": None, "platforms": {}} try: - with open(DIRECTORY_PATH) as f: + with open(DIRECTORY_PATH, encoding="utf-8") as f: return json.load(f) except Exception: return {"updated_at": None, "platforms": {}} diff --git a/gateway/mirror.py b/gateway/mirror.py index 8c2f39983..527fc2c13 100644 --- a/gateway/mirror.py +++ b/gateway/mirror.py @@ -73,7 +73,7 @@ def _find_session_id(platform: str, chat_id: str) -> Optional[str]: return None try: - with open(_SESSIONS_INDEX) as f: + with open(_SESSIONS_INDEX, encoding="utf-8") as f: data = json.load(f) except Exception: return None @@ -103,7 +103,7 @@ def _append_to_jsonl(session_id: str, message: dict) -> None: """Append a message to the JSONL transcript file.""" transcript_path = _SESSIONS_DIR / f"{session_id}.jsonl" try: - with open(transcript_path, "a") as f: + with open(transcript_path, "a", encoding="utf-8") as f: f.write(json.dumps(message, ensure_ascii=False) + "\n") except Exception as e: logger.debug("Mirror JSONL write failed: %s", e) diff --git a/gateway/session.py b/gateway/session.py index b59196b81..e0b8db083 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -317,7 +317,7 @@ class SessionStore: if sessions_file.exists(): try: - with open(sessions_file, "r") as f: + with open(sessions_file, "r", encoding="utf-8") as f: data = json.load(f) for key, entry_data in data.items(): self._entries[key] = SessionEntry.from_dict(entry_data) @@ -332,7 +332,7 @@ class SessionStore: sessions_file = self.sessions_dir / "sessions.json" data = {key: entry.to_dict() for key, entry in self._entries.items()} - with open(sessions_file, "w") as f: + with open(sessions_file, "w", encoding="utf-8") as f: json.dump(data, f, indent=2) def _generate_session_key(self, source: SessionSource) -> str: @@ -571,7 +571,7 @@ class SessionStore: # Also write legacy JSONL (keeps existing tooling working during transition) transcript_path = self.get_transcript_path(session_id) - with open(transcript_path, "a") as f: + with open(transcript_path, "a", encoding="utf-8") as f: f.write(json.dumps(message, ensure_ascii=False) + "\n") def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None: @@ -598,7 +598,7 @@ class SessionStore: # JSONL: overwrite the file transcript_path = self.get_transcript_path(session_id) - with open(transcript_path, "w") as f: + with open(transcript_path, "w", encoding="utf-8") as f: for msg in messages: f.write(json.dumps(msg, ensure_ascii=False) + "\n") @@ -620,7 +620,7 @@ class SessionStore: return [] messages = [] - with open(transcript_path, "r") as f: + with open(transcript_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: