From 2bf96ad244611ea067e2f24cd22a09e620a0fa09 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 23 Feb 2026 23:55:42 -0800 Subject: [PATCH] feat: add ephemeral prefill messages and system prompt loading - Implemented functionality to load ephemeral prefill messages from a JSON file, enhancing few-shot priming capabilities for the agent. - Introduced a mechanism to load an ephemeral system prompt from environment variables or configuration files, ensuring dynamic prompt adjustments at API-call time. - Updated the CLI and agent initialization to utilize the new prefill messages and system prompt, improving the overall interaction experience. - Enhanced configuration options with new environment variables for prefill messages and system prompts, allowing for greater customization without persistence. --- cli.py | 49 +++++++++++++++++++++++--- gateway/run.py | 75 ++++++++++++++++++++++++++++++++++++++-- hermes_cli/config.py | 19 ++++++++++ run_agent.py | 66 +++++++++++++++++++++++++---------- tools/file_operations.py | 27 +++++++++++++-- tools/file_tools.py | 2 +- tools/web_tools.py | 16 ++++----- 7 files changed, 218 insertions(+), 36 deletions(-) diff --git a/cli.py b/cli.py index ce0b00ad4..42394efa1 100755 --- a/cli.py +++ b/cli.py @@ -61,6 +61,35 @@ if env_path.exists(): # Configuration Loading # ============================================================================= +def _load_prefill_messages(file_path: str) -> List[Dict[str, Any]]: + """Load ephemeral prefill messages from a JSON file. + + The file should contain a JSON array of {role, content} dicts, e.g.: + [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello!"}] + + Relative paths are resolved from ~/.hermes/. + Returns an empty list if the path is empty or the file doesn't exist. + """ + if not file_path: + return [] + path = Path(file_path).expanduser() + if not path.is_absolute(): + path = Path.home() / ".hermes" / path + if not path.exists(): + logger.warning("Prefill messages file not found: %s", path) + return [] + try: + with open(path, "r", encoding="utf-8") as f: + data = json.load(f) + if not isinstance(data, list): + logger.warning("Prefill messages file must contain a JSON array: %s", path) + return [] + return data + except Exception as e: + logger.warning("Failed to load prefill messages from %s: %s", path, e) + return [] + + def load_cli_config() -> Dict[str, Any]: """ Load CLI configuration from config files. @@ -116,6 +145,7 @@ def load_cli_config() -> Dict[str, Any]: "max_turns": 60, # Default max tool-calling iterations "verbose": False, "system_prompt": "", + "prefill_messages_file": "", "personalities": { "helpful": "You are a helpful, friendly AI assistant.", "concise": "You are a concise assistant. Keep responses brief and to the point.", @@ -753,10 +783,18 @@ class HermesCLI: if invalid: self.console.print(f"[bold red]Warning: Unknown toolsets: {', '.join(invalid)}[/]") - # System prompt and personalities from config - self.system_prompt = CLI_CONFIG["agent"].get("system_prompt", "") + # Ephemeral system prompt: env var takes precedence, then config + self.system_prompt = ( + os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "") + or CLI_CONFIG["agent"].get("system_prompt", "") + ) self.personalities = CLI_CONFIG["agent"].get("personalities", {}) + # Ephemeral prefill messages (few-shot priming, never persisted) + self.prefill_messages = _load_prefill_messages( + CLI_CONFIG["agent"].get("prefill_messages_file", "") + ) + # Agent will be initialized on first use self.agent: Optional[AIAgent] = None self._app = None # prompt_toolkit Application (set in run()) @@ -848,10 +886,11 @@ class HermesCLI: max_iterations=self.max_turns, enabled_toolsets=self.enabled_toolsets, verbose_logging=self.verbose, - quiet_mode=True, # Suppress verbose output for clean CLI + quiet_mode=True, ephemeral_system_prompt=self.system_prompt if self.system_prompt else None, - session_id=self.session_id, # Pass CLI's session ID to agent - platform="cli", # CLI interface — agent uses terminal-friendly formatting + prefill_messages=self.prefill_messages or None, + session_id=self.session_id, + platform="cli", session_db=self._session_db, clarify_callback=self._clarify_callback, ) diff --git a/gateway/run.py b/gateway/run.py index b7be4478e..9c07d28e5 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -92,6 +92,11 @@ class GatewayRunner: self.config = config or load_gateway_config() self.adapters: Dict[Platform, BasePlatformAdapter] = {} + # Load ephemeral config from config.yaml / env vars. + # Both are injected at API-call time only and never persisted. + self._prefill_messages = self._load_prefill_messages() + self._ephemeral_system_prompt = self._load_ephemeral_system_prompt() + # Wire process registry into session store for reset protection from tools.process_registry import process_registry self.session_store = SessionStore( @@ -119,6 +124,66 @@ class GatewayRunner: from gateway.hooks import HookRegistry self.hooks = HookRegistry() + @staticmethod + def _load_prefill_messages() -> List[Dict[str, Any]]: + """Load ephemeral prefill messages from config or env var. + + Checks HERMES_PREFILL_MESSAGES_FILE env var first, then falls back to + the prefill_messages_file key in ~/.hermes/config.yaml. + Relative paths are resolved from ~/.hermes/. + """ + import json as _json + file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") + if not file_path: + try: + import yaml as _y + cfg_path = Path.home() / ".hermes" / "config.yaml" + if cfg_path.exists(): + with open(cfg_path) as _f: + cfg = _y.safe_load(_f) or {} + file_path = cfg.get("prefill_messages_file", "") + except Exception: + pass + if not file_path: + return [] + path = Path(file_path).expanduser() + if not path.is_absolute(): + path = Path.home() / ".hermes" / path + if not path.exists(): + logger.warning("Prefill messages file not found: %s", path) + return [] + try: + with open(path, "r", encoding="utf-8") as f: + data = _json.load(f) + if not isinstance(data, list): + logger.warning("Prefill messages file must contain a JSON array: %s", path) + return [] + return data + except Exception as e: + logger.warning("Failed to load prefill messages from %s: %s", path, e) + return [] + + @staticmethod + def _load_ephemeral_system_prompt() -> str: + """Load ephemeral system prompt from config or env var. + + Checks HERMES_EPHEMERAL_SYSTEM_PROMPT env var first, then falls back to + agent.system_prompt in ~/.hermes/config.yaml. + """ + prompt = os.getenv("HERMES_EPHEMERAL_SYSTEM_PROMPT", "") + if prompt: + return prompt + try: + import yaml as _y + cfg_path = Path.home() / ".hermes" / "config.yaml" + if cfg_path.exists(): + with open(cfg_path) as _f: + cfg = _y.safe_load(_f) or {} + return (cfg.get("agent", {}).get("system_prompt", "") or "").strip() + except Exception: + pass + return "" + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -1275,15 +1340,21 @@ class GatewayRunner: # Platform.LOCAL ("local") maps to "cli"; others pass through as-is. platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value + # Combine platform context with user-configured ephemeral system prompt + combined_ephemeral = context_prompt or "" + if self._ephemeral_system_prompt: + combined_ephemeral = (combined_ephemeral + "\n\n" + self._ephemeral_system_prompt).strip() + agent = AIAgent( model=os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6"), max_iterations=max_iterations, quiet_mode=True, enabled_toolsets=enabled_toolsets, - ephemeral_system_prompt=context_prompt, + ephemeral_system_prompt=combined_ephemeral or None, + prefill_messages=self._prefill_messages or None, session_id=session_id, tool_progress_callback=progress_callback if tool_progress_enabled else None, - platform=platform_key, # Tells the agent which interface to format for + platform=platform_key, ) # Store agent reference for interrupt support diff --git a/hermes_cli/config.py b/hermes_cli/config.py index ba45ad9f1..0b2868fae 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -122,6 +122,11 @@ DEFAULT_CONFIG = { "user_char_limit": 1375, # ~500 tokens at 2.75 chars/token }, + # Ephemeral prefill messages file — JSON list of {role, content} dicts + # injected at the start of every API call for few-shot priming. + # Never saved to sessions, logs, or trajectories. + "prefill_messages_file": "", + # Permanently allowed dangerous command patterns (added via "always" approval) "command_allowlist": [], @@ -312,6 +317,20 @@ OPTIONAL_ENV_VARS = { "password": False, "category": "setting", }, + "HERMES_PREFILL_MESSAGES_FILE": { + "description": "Path to JSON file with ephemeral prefill messages for few-shot priming", + "prompt": "Prefill messages file path", + "url": None, + "password": False, + "category": "setting", + }, + "HERMES_EPHEMERAL_SYSTEM_PROMPT": { + "description": "Ephemeral system prompt injected at API-call time (never persisted to sessions)", + "prompt": "Ephemeral system prompt", + "url": None, + "password": False, + "category": "setting", + }, } diff --git a/run_agent.py b/run_agent.py index 3b557451d..3b31b9947 100644 --- a/run_agent.py +++ b/run_agent.py @@ -677,12 +677,10 @@ class AIAgent: "value": user_query }) - # Calculate where agent responses start in the messages list. - # Prefill messages are ephemeral (only used to prime model response style) - # so we skip them entirely in the saved trajectory. - # Layout: [*prefill_msgs, actual_user_msg, ...agent_responses...] - num_prefill = len(self.prefill_messages) if self.prefill_messages else 0 - i = num_prefill + 1 # Skip prefill messages + the actual user message (already added above) + # Skip the first message (the user query) since we already added it above. + # Prefill messages are injected at API-call time only (not in the messages + # list), so no offset adjustment is needed here. + i = 1 while i < len(messages): msg = messages[i] @@ -1043,9 +1041,10 @@ class AIAgent: if tool_guidance: prompt_parts.append(" ".join(tool_guidance)) - caller_prompt = system_message if system_message is not None else self.ephemeral_system_prompt - if caller_prompt: - prompt_parts.append(caller_prompt) + # Note: ephemeral_system_prompt is NOT included here. It's injected at + # API-call time only so it stays out of the cached/stored system prompt. + if system_message is not None: + prompt_parts.append(system_message) if self._memory_store: if self._memory_enabled: @@ -1510,6 +1509,19 @@ class AIAgent: logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s") logging.debug(f"Tool result preview: {result_preview}...") + # Guard against tools returning absurdly large content that would + # blow up the context window. 100K chars ≈ 25K tokens — generous + # enough for any reasonable tool output but prevents catastrophic + # context explosions (e.g. accidental base64 image dumps). + MAX_TOOL_RESULT_CHARS = 100_000 + if len(function_result) > MAX_TOOL_RESULT_CHARS: + original_len = len(function_result) + function_result = ( + function_result[:MAX_TOOL_RESULT_CHARS] + + f"\n\n[Truncated: tool response was {original_len:,} chars, " + f"exceeding the {MAX_TOOL_RESULT_CHARS:,} char limit]" + ) + tool_msg = { "role": "tool", "content": function_result, @@ -1551,8 +1563,15 @@ class AIAgent: try: api_messages = messages.copy() + effective_system = self._cached_system_prompt or "" if self.ephemeral_system_prompt: - api_messages = [{"role": "system", "content": self.ephemeral_system_prompt}] + api_messages + effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() + if effective_system: + api_messages = [{"role": "system", "content": effective_system}] + api_messages + if self.prefill_messages: + sys_offset = 1 if effective_system else 0 + for idx, pfm in enumerate(self.prefill_messages): + api_messages.insert(sys_offset + idx, pfm.copy()) summary_extra_body = {} if "openrouter" in self.base_url.lower(): @@ -1628,11 +1647,10 @@ class AIAgent: if conversation_history and not self._todo_store.has_items(): self._hydrate_todo_store(conversation_history) - # Inject prefill messages at the start of conversation (before user's actual prompt) - # This is used for few-shot priming, e.g., a greeting exchange to set response style - if self.prefill_messages and not conversation_history: - for prefill_msg in self.prefill_messages: - messages.append(prefill_msg.copy()) + # Prefill messages (few-shot priming) are injected at API-call time only, + # never stored in the messages list. This keeps them ephemeral: they won't + # be saved to session DB, session logs, or batch trajectories, but they're + # automatically re-applied on every API call (including session continuations). # Track user turns for memory flush and periodic nudge logic self._user_turn_count += 1 @@ -1733,9 +1751,21 @@ class AIAgent: # The signature field helps maintain reasoning continuity api_messages.append(api_msg) - if active_system_prompt: - # Insert system message at the beginning - api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages + # Build the final system message: cached prompt + ephemeral system prompt. + # The ephemeral part is appended here (not baked into the cached prompt) + # so it stays out of the session DB and logs. + effective_system = active_system_prompt or "" + if self.ephemeral_system_prompt: + effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() + if effective_system: + api_messages = [{"role": "system", "content": effective_system}] + api_messages + + # Inject ephemeral prefill messages right after the system prompt + # but before conversation history. Same API-call-time-only pattern. + if self.prefill_messages: + sys_offset = 1 if effective_system else 0 + for idx, pfm in enumerate(self.prefill_messages): + api_messages.insert(sys_offset + idx, pfm.copy()) # Apply Anthropic prompt caching for Claude models via OpenRouter. # Auto-detected: if model name contains "claude" and base_url is OpenRouter, diff --git a/tools/file_operations.py b/tools/file_operations.py index 64ce6d86a..ae7dad79c 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -412,9 +412,17 @@ class ShellFileOperations(FileOperations): # Still try to read, but warn pass - # Check if it's an image - return base64 + # Images are never inlined — redirect to the vision tool if self._is_image(path): - return self._read_image(path) + return ReadResult( + is_image=True, + is_binary=True, + file_size=file_size, + hint=( + "Image file detected. Automatically redirected to vision_analyze tool. " + "Use vision_analyze with this file path to inspect the image contents." + ), + ) # Read a sample to check for binary content sample_cmd = f"head -c 1000 {self._escape_shell_arg(path)} 2>/dev/null" @@ -457,6 +465,10 @@ class ShellFileOperations(FileOperations): hint=hint ) + # Images larger than this are too expensive to inline as base64 in the + # conversation context. Return metadata only and suggest vision_analyze. + MAX_IMAGE_BYTES = 512 * 1024 # 512 KB + def _read_image(self, path: str) -> ReadResult: """Read an image file, returning base64 content.""" # Get file size @@ -467,6 +479,17 @@ class ShellFileOperations(FileOperations): except ValueError: file_size = 0 + if file_size > self.MAX_IMAGE_BYTES: + return ReadResult( + is_image=True, + is_binary=True, + file_size=file_size, + hint=( + f"Image is too large to inline ({file_size:,} bytes). " + "Use vision_analyze to inspect the image, or reference it by path." + ), + ) + # Get base64 content b64_cmd = f"base64 -w 0 {self._escape_shell_arg(path)} 2>/dev/null" b64_result = self._exec(b64_cmd, timeout=30) diff --git a/tools/file_tools.py b/tools/file_tools.py index c9708b2eb..91d69c411 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -199,7 +199,7 @@ def _check_file_reqs(): READ_FILE_SCHEMA = { "name": "read_file", - "description": "Read a file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Images (png/jpg/gif/webp) returned as base64. Use offset and limit for large files.", + "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. NOTE: Cannot read images or binary files — use vision_analyze for images.", "parameters": { "type": "object", "properties": { diff --git a/tools/web_tools.py b/tools/web_tools.py index ed98b05c2..43a7da55a 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -159,7 +159,7 @@ async def process_content_with_llm( return processed_content except Exception as e: - logger.error("Error processing content with LLM: %s", e) + logger.debug("Error processing content with LLM: %s", e) return f"[Failed to process content: {str(e)[:100]}. Content size: {len(content):,} chars]" @@ -318,7 +318,7 @@ async def _process_large_content_chunked( summaries.append(f"## Section {chunk_idx + 1}\n{summary}") if not summaries: - logger.error("All chunk summarizations failed") + logger.debug("All chunk summarizations failed") return "[Failed to process large content: all chunk summarizations failed]" logger.info("Got %d/%d chunk summaries", len(summaries), len(chunks)) @@ -532,7 +532,7 @@ def web_search_tool(query: str, limit: int = 5) -> str: except Exception as e: error_msg = f"Error searching web: {str(e)}" - logger.error("%s", error_msg) + logger.debug("%s", error_msg) debug_call_data["error"] = error_msg _debug.log_call("web_search_tool", debug_call_data) @@ -673,7 +673,7 @@ async def web_extract_tool( }) except Exception as scrape_err: - logger.error("Error scraping %s: %s", url, scrape_err) + logger.debug("Scrape failed for %s: %s", url, scrape_err) results.append({ "url": url, "title": "", @@ -799,7 +799,7 @@ async def web_extract_tool( except Exception as e: error_msg = f"Error extracting content: {str(e)}" - logger.error("%s", error_msg) + logger.debug("%s", error_msg) debug_call_data["error"] = error_msg _debug.log_call("web_extract_tool", debug_call_data) @@ -892,7 +892,7 @@ async def web_crawl_tool( **crawl_params ) except Exception as e: - logger.error("Crawl API call failed: %s", e) + logger.debug("Crawl API call failed: %s", e) raise pages: List[Dict[str, Any]] = [] @@ -1092,7 +1092,7 @@ async def web_crawl_tool( except Exception as e: error_msg = f"Error crawling website: {str(e)}" - logger.error("%s", error_msg) + logger.debug("%s", error_msg) debug_call_data["error"] = error_msg _debug.log_call("web_crawl_tool", debug_call_data) @@ -1227,7 +1227,7 @@ WEB_SEARCH_SCHEMA = { WEB_EXTRACT_SCHEMA = { "name": "web_extract", - "description": "Extract content from web page URLs. Pages under 5000 chars return raw content; larger pages are LLM-summarized and capped at ~5000 chars per page. Pages over 2M chars are refused. Use browser tools only when pages require interaction or dynamic content.", + "description": "Extract content from web page URLs. Returns page content in markdown format. Pages under 5000 chars return full markdown; larger pages are LLM-summarized and capped at ~5000 chars per page. Pages over 2M chars are refused. If a URL fails or times out, use the browser tool to access it instead.", "parameters": { "type": "object", "properties": {