Compare commits

..

3 Commits

Author SHA1 Message Date
628487f7bd fix(cron): rewrite cloud-incompatible prompt instructions (#378)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m9s
Health Monitor prompts say 'Check Ollama is responding' but run
on cloud models that cannot reach localhost. Instead of just
warning the agent, rewrite the instructions to cloud-compatible
equivalents the agent can actually execute.

Changes:
- Add import re
- Add _CLOUD_INCOMPATIBLE_PATTERNS: regex pairs (pattern, replacement)
- Add _rewrite_cloud_incompatible_prompt(): rewrites localhost/Ollama
  references to 'use available tools to check service health'
- Wire into run_job() after resolve_turn_route()

Closes #378
2026-04-14 01:47:00 +00:00
954fd992eb Merge pull request 'perf: lazy session creation — defer DB write until first message (#314)' (#449) from whip/314-1776127532 into main
Some checks failed
Forge CI / smoke-and-build (push) Failing after 55s
Forge CI / smoke-and-build (pull_request) Failing after 1m12s
perf: lazy session creation (#314)

Closes #314.
2026-04-14 01:08:13 +00:00
Metatron
f35f56e397 perf: lazy session creation — defer DB write until first message (closes #314)
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 56s
Remove eager create_session() call from AIAgent.__init__(). Sessions
are now created lazily on first _flush_messages_to_session_db() call
via ensure_session() which uses INSERT OR IGNORE.

Impact: eliminates 32.4% of sessions (3,564 of 10,985) that were
created at agent init but never received any messages.

The existing ensure_session() fallback in _flush_messages_to_session_db()
already handles this pattern — it was originally designed for recovery
after transient SQLite lock failures. Now it's the primary creation path.

Compression-initiated sessions still use create_session() directly
(line ~5995) since they have messages to write immediately.
2026-04-13 20:52:06 -04:00
4 changed files with 59 additions and 95 deletions

View File

@@ -13,6 +13,7 @@ import concurrent.futures
import json
import logging
import os
import re
import subprocess
import sys
@@ -643,7 +644,56 @@ def _build_job_prompt(job: dict) -> str:
return "\n".join(parts)
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
# Regex patterns for local service references that fail on cloud endpoints
_CLOUD_INCOMPATIBLE_PATTERNS = [
(re.compile(r"\b[Cc]heck\s+(?:that\s+)?[Oo]llama\s+(?:is\s+)?(?:responding|running|up|available)", re.IGNORECASE),
"Verify system services are healthy using available tools"),
(re.compile(r"\b[Vv]erify\s+(?:that\s+)?[Oo]llama\s+(?:is\s+)?(?:responding|running|up)", re.IGNORECASE),
"Verify system services are healthy using available tools"),
(re.compile(r"\bcurl\s+localhost:\d+", re.IGNORECASE),
"use available tools to check service health"),
(re.compile(r"\bcurl\s+127\.0\.0\.1:\d+", re.IGNORECASE),
"use available tools to check service health"),
(re.compile(r"\bpoll\s+localhost", re.IGNORECASE),
"check service health via available tools"),
]
def _rewrite_cloud_incompatible_prompt(prompt: str, base_url: str) -> str:
"""Rewrite prompt instructions that assume local service access when running on cloud.
When a cron job runs on a cloud inference endpoint (Nous, OpenRouter, Anthropic),
instructions to "Check Ollama" or "curl localhost:11434" are impossible.
Instead of just warning, this rewrites the instruction to a cloud-compatible
equivalent that the agent can actually execute.
Returns the (possibly rewritten) prompt.
"""
try:
from agent.model_metadata import is_local_endpoint
except ImportError:
return prompt
if is_local_endpoint(base_url or ""):
return prompt # Local — no rewrite needed
rewritten = prompt
for pattern, replacement in _CLOUD_INCOMPATIBLE_PATTERNS:
rewritten = pattern.sub(replacement, rewritten)
if rewritten != prompt:
rewritten = (
"[NOTE: Some instructions were adjusted for cloud execution. "
"Local service checks were rewritten to use available tools.]
"
+ rewritten
)
return rewritten
def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:(job: dict) -> tuple[bool, str, str, Optional[str]]:
"""
Execute a single cron job.

View File

@@ -1001,30 +1001,10 @@ class AIAgent:
self._session_db = session_db
self._parent_session_id = parent_session_id
self._last_flushed_db_idx = 0 # tracks DB-write cursor to prevent duplicate writes
if self._session_db:
try:
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or os.environ.get("HERMES_SESSION_SOURCE", "cli"),
model=self.model,
model_config={
"max_iterations": self.max_iterations,
"reasoning_config": reasoning_config,
"max_tokens": max_tokens,
},
user_id=None,
parent_session_id=self._parent_session_id,
)
except Exception as e:
# Transient SQLite lock contention (e.g. CLI and gateway writing
# concurrently) must NOT permanently disable session_search for
# this agent. Keep _session_db alive — subsequent message
# flushes and session_search calls will still work once the
# lock clears. The session row may be missing from the index
# for this run, but that is recoverable (flushes upsert rows).
logger.warning(
"Session DB create_session failed (session_search still available): %s", e
)
# Lazy session creation: defer until first message flush (#314).
# _flush_messages_to_session_db() calls ensure_session() which uses
# INSERT OR IGNORE — creating the row only when messages arrive.
# This eliminates 32% of sessions that are created but never used.
# In-memory todo list for task planning (one per agent/session)
from tools.todo_tool import TodoStore

View File

@@ -1,52 +0,0 @@
"""Tests for TTS speed support (#321)."""
import json
import pytest
from unittest.mock import MagicMock, patch, AsyncMock
class TestTTSSchemaHasSpeed:
def test_schema_includes_speed(self):
from tools.tts_tool import TTS_SCHEMA
assert "speed" in TTS_SCHEMA["parameters"]["properties"]
assert TTS_SCHEMA["parameters"]["properties"]["speed"]["type"] == "number"
def test_speed_not_required(self):
from tools.tts_tool import TTS_SCHEMA
assert "speed" not in TTS_SCHEMA["parameters"].get("required", [])
class TestTextToSpeechToolSignature:
def test_accepts_speed(self):
from tools.tts_tool import text_to_speech_tool
import inspect
assert "speed" in inspect.signature(text_to_speech_tool).parameters
class TestSpeedClamping:
@patch("tools.tts_tool._load_tts_config", return_value={})
@patch("tools.tts_tool._get_provider", return_value="edge")
@patch("tools.tts_tool._import_edge_tts")
def test_clamped_low(self, mock_edge, mock_prov, mock_cfg):
from tools.tts_tool import text_to_speech_tool
with patch("tools.tts_tool.asyncio.run"):
with patch("tools.tts_tool.os.path.exists", return_value=True):
with patch("tools.tts_tool.os.path.getsize", return_value=1000):
assert "success" in json.loads(text_to_speech_tool("test", speed=0.01))
@patch("tools.tts_tool._load_tts_config", return_value={})
@patch("tools.tts_tool._get_provider", return_value="edge")
@patch("tools.tts_tool._import_edge_tts")
def test_clamped_high(self, mock_edge, mock_prov, mock_cfg):
from tools.tts_tool import text_to_speech_tool
with patch("tools.tts_tool.asyncio.run"):
with patch("tools.tts_tool.os.path.exists", return_value=True):
with patch("tools.tts_tool.os.path.getsize", return_value=1000):
assert "success" in json.loads(text_to_speech_tool("test", speed=100.0))
class TestEdgeTTSRateConversion:
def test_rates(self):
for speed, expected in [(1.0, "+0%"), (1.5, "+50%"), (0.5, "-50%"), (2.0, "+100%"), (0.25, "-75%")]:
pct = int((speed - 1.0) * 100)
rate = f"+{pct}%" if pct >= 0 else f"{pct}%"
assert rate == expected

View File

@@ -179,10 +179,8 @@ async def _generate_edge_tts(text: str, output_path: str, tts_config: Dict[str,
_edge_tts = _import_edge_tts()
edge_config = tts_config.get("edge", {})
voice = edge_config.get("voice", DEFAULT_EDGE_VOICE)
speed = tts_config.get("_speed_override") or edge_config.get("speed", 1.0)
rate_pct = int((speed - 1.0) * 100)
rate_str = f"+{rate_pct}%" if rate_pct >= 0 else f"{rate_pct}%"
communicate = _edge_tts.Communicate(text, voice, rate=rate_str)
communicate = _edge_tts.Communicate(text, voice)
await communicate.save(output_path)
return output_path
@@ -264,14 +262,11 @@ def _generate_openai_tts(text: str, output_path: str, tts_config: Dict[str, Any]
OpenAIClient = _import_openai_client()
client = OpenAIClient(api_key=api_key, base_url=base_url)
try:
speed = tts_config.get("_speed_override") or oai_config.get("speed", 1.0)
speed = max(0.25, min(4.0, speed))
response = client.audio.speech.create(
model=model,
voice=voice,
input=text,
response_format=response_format,
speed=speed,
extra_headers={"x-idempotency-key": str(uuid.uuid4())},
)
@@ -310,7 +305,7 @@ def _generate_minimax_tts(text: str, output_path: str, tts_config: Dict[str, Any
mm_config = tts_config.get("minimax", {})
model = mm_config.get("model", DEFAULT_MINIMAX_MODEL)
voice_id = mm_config.get("voice_id", DEFAULT_MINIMAX_VOICE_ID)
speed = tts_config.get("_speed_override") or mm_config.get("speed", 1)
speed = mm_config.get("speed", 1)
vol = mm_config.get("vol", 1)
pitch = mm_config.get("pitch", 0)
base_url = mm_config.get("base_url", DEFAULT_MINIMAX_BASE_URL)
@@ -452,7 +447,6 @@ def _generate_neutts(text: str, output_path: str, tts_config: Dict[str, Any]) ->
def text_to_speech_tool(
text: str,
output_path: Optional[str] = None,
speed: Optional[float] = None,
) -> str:
"""
Convert text to speech audio.
@@ -480,9 +474,6 @@ def text_to_speech_tool(
text = text[:MAX_TEXT_LENGTH]
tts_config = _load_tts_config()
if speed is not None:
speed = max(0.25, min(4.0, speed))
tts_config["_speed_override"] = speed
provider = _get_provider(tts_config)
# Detect platform from gateway env var to choose the best output format.
@@ -975,10 +966,6 @@ TTS_SCHEMA = {
"output_path": {
"type": "string",
"description": "Optional custom file path to save the audio. Defaults to ~/.hermes/audio_cache/<timestamp>.mp3"
},
"speed": {
"type": "number",
"description": "Speech speed multiplier. 1.0 = normal, 0.5 = half speed, 2.0 = double. Range: 0.25-4.0. Edge TTS uses SSML rate, OpenAI uses native speed param, MiniMax passes directly."
}
},
"required": ["text"]
@@ -991,8 +978,7 @@ registry.register(
schema=TTS_SCHEMA,
handler=lambda args, **kw: text_to_speech_tool(
text=args.get("text", ""),
output_path=args.get("output_path"),
speed=args.get("speed")),
output_path=args.get("output_path")),
check_fn=check_tts_requirements,
emoji="🔊",
)