From f64a87209d8f40f002fd8589cd03cafe292b208f Mon Sep 17 00:00:00 2001
From: teknium1 <teknium1@gmail.com>
Date: Wed, 25 Feb 2026 04:22:03 -0800
Subject: [PATCH] refactor: enhance session content handling in AIAgent and
 update TTS output path

- Introduced a new static method `_clean_session_content` in the `AIAgent` class to convert REASONING_SCRATCHPAD tags to <think> blocks and clean up whitespace in session logs.
- Updated the `_save_session_log` method to utilize the cleaned content for assistant messages, ensuring consistency in session logs.
- Changed the default output directory for TTS audio files from `~/voice-memos` to `~/.hermes/audio_cache`, reflecting a more appropriate storage location.
---
 run_agent.py      | 25 +++++++++++++++++++++++--
 tools/tts_tool.py |  4 ++--
 2 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/run_agent.py b/run_agent.py
index 942788813..94f683e29 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -899,6 +899,18 @@ class AIAgent:
                 logging.warning(f"Failed to dump API request debug payload: {dump_error}")
             return None
 
+    @staticmethod
+    def _clean_session_content(content: str) -> str:
+        """Convert REASONING_SCRATCHPAD to think tags and clean up whitespace."""
+        if not content:
+            return content
+        content = convert_scratchpad_to_think(content)
+        # Strip extra newlines before/after think blocks
+        import re
+        content = re.sub(r'\n+(<think>)', r'\n\1', content)
+        content = re.sub(r'(</think>)\n+', r'\1\n', content)
+        return content.strip()
+
     def _save_session_log(self, messages: List[Dict[str, Any]] = None):
         """
         Save the full raw session to a JSON file.
@@ -908,6 +920,7 @@ class AIAgent:
         tool responses (with tool_call_id, tool_name), and injected system
         messages (compression summaries, todo snapshots, etc.).
 
+        REASONING_SCRATCHPAD tags are converted to <think> blocks for consistency.
         Overwritten after each turn so it always reflects the latest state.
         """
         messages = messages or self._session_messages
@@ -915,6 +928,14 @@ class AIAgent:
             return
 
         try:
+            # Clean assistant content for session logs
+            cleaned = []
+            for msg in messages:
+                if msg.get("role") == "assistant" and msg.get("content"):
+                    msg = dict(msg)
+                    msg["content"] = self._clean_session_content(msg["content"])
+                cleaned.append(msg)
+
             entry = {
                 "session_id": self.session_id,
                 "model": self.model,
@@ -922,8 +943,8 @@ class AIAgent:
                 "platform": self.platform,
                 "session_start": self.session_start.isoformat(),
                 "last_updated": datetime.now().isoformat(),
-                "message_count": len(messages),
-                "messages": messages,
+                "message_count": len(cleaned),
+                "messages": cleaned,
             }
 
             with open(self.session_log_file, "w", encoding="utf-8") as f:
diff --git a/tools/tts_tool.py b/tools/tts_tool.py
index 34f8dbcfc..3c02c58a7 100644
--- a/tools/tts_tool.py
+++ b/tools/tts_tool.py
@@ -65,7 +65,7 @@ DEFAULT_ELEVENLABS_VOICE_ID = "pNInz6obpgDQGcFmaJgB"  # Adam
 DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2"
 DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts"
 DEFAULT_OPENAI_VOICE = "alloy"
-DEFAULT_OUTPUT_DIR = os.path.expanduser("~/voice-memos")
+DEFAULT_OUTPUT_DIR = os.path.expanduser("~/.hermes/audio_cache")
 MAX_TEXT_LENGTH = 4000
 
 
@@ -435,7 +435,7 @@ TTS_SCHEMA = {
             },
             "output_path": {
                 "type": "string",
-                "description": "Optional custom file path to save the audio. Defaults to ~/voice-memos/<timestamp>.mp3"
+                "description": "Optional custom file path to save the audio. Defaults to ~/.hermes/audio_cache/<timestamp>.mp3"
             }
         },
         "required": ["text"]