Compare commits

..

1 Commits

Author SHA1 Message Date
Timmy Time
d3ff0873c5 Fix #372: Runtime-aware cron prompts with provider mismatch detection
Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m27s
Injects runtime context so agents know capabilities.
Detects provider mismatches and warns operators.
Fixes #372
2026-04-14 11:39:10 -04:00
4 changed files with 63 additions and 254 deletions

View File

@@ -39,4 +39,4 @@ __all__ = [
"trigger_job",
"tick",
"JOBS_FILE",
]
]

View File

@@ -186,14 +186,7 @@ _SCRIPT_FAILURE_PHRASES = (
"unable to execute",
"permission denied",
"no such file",
"no such file or directory",
"command not found",
"traceback",
"hermes binary not found",
"hermes not found",
"ssh: connect to host",
"connection timed out",
"host key verification failed",
)
@@ -552,7 +545,24 @@ def _run_job_script(script_path: str) -> tuple[bool, str]:
return False, f"Script execution failed: {exc}"
def _build_job_prompt(job: dict) -> str:
_PROVIDER_ALIASES = {"ollama":{"ollama","localhost:11434"},"anthropic":{"anthropic","claude"},"nous":{"nous","mimo"},"openrouter":{"openrouter"},"openai":{"openai","gpt"},"gemini":{"gemini","google"}}
_CP = frozenset({"nous","openrouter","anthropic","openai","zai","kimi","gemini","minimax"})
def _classify_runtime(provider, model):
p, m = (provider or "").strip().lower(), (model or "").strip().lower()
if p and p not in ("ollama","local"): return "cloud"
if "/" in m and m.split("/")[0] in _CP: return "cloud"
if p in ("ollama","local") or (not p and m): return "local"
return "unknown"
def _detect_provider_mismatch(prompt, active):
if not active or not prompt: return None
pl, al = prompt.lower(), active.lower().strip()
ag = next((g for g,a in _PROVIDER_ALIASES.items() if al in a or al.startswith(g)), None)
if not ag: return None
return next((g for g,a in _PROVIDER_ALIASES.items() if g!=ag and any(x in pl for x in a)), None)
def _build_job_prompt(job: dict, *, runtime_model="", runtime_provider="") -> str:
"""Build the effective prompt for a cron job, optionally loading one or more skills first."""
prompt = job.get("prompt", "")
skills = job.get("skills")
@@ -583,6 +593,16 @@ def _build_job_prompt(job: dict) -> str:
f"{prompt}"
)
_rb = ""
if runtime_model or runtime_provider:
_k = _classify_runtime(runtime_provider, runtime_model)
_n = []
if runtime_model: _n.append(f"MODEL: {runtime_model}")
if runtime_provider: _n.append(f"PROVIDER: {runtime_provider}")
if _k=="local": _n.append("RUNTIME: local — access to machine, Ollama, SSH")
elif _k=="cloud": _n.append("RUNTIME: cloud — NO local access, NO SSH")
if _n: _rb = "[SYSTEM: " + "; ".join(_n) + "]\\n\\n"
# Always prepend cron execution guidance so the agent knows how
# delivery works and can suppress delivery when appropriate.
cron_hint = (
@@ -604,7 +624,7 @@ def _build_job_prompt(job: dict) -> str:
"\"[SCRIPT_FAILED]: forge.alexanderwhitestone.com timed out\" "
"\"[SCRIPT_FAILED]: script exited with code 1\".]\\n\\n"
)
prompt = cron_hint + prompt
prompt = _rb + cron_hint + prompt
if skills is None:
legacy = job.get("skill")
skills = [legacy] if legacy else []
@@ -674,7 +694,18 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
job_id = job["id"]
job_name = job["name"]
prompt = _build_job_prompt(job)
_em = job.get("model") or os.getenv("HERMES_MODEL") or ""
_ep = os.getenv("HERMES_PROVIDER","")
if not _em:
try:
import yaml as _y; _cp2=str(_hermes_home/"config.yaml")
if os.path.exists(_cp2):
with open(_cp2) as _f: _ce=_y.safe_load(_f) or {}
_mc=_ce.get("model",{})
_em = _mc if isinstance(_mc,str) else (_mc.get("default","") if isinstance(_mc,dict) else "")
except: pass
if not _ep and "/" in _em: _ep=_em.split("/")[0]
prompt = _build_job_prompt(job, runtime_model=_em, runtime_provider=_ep)
origin = _resolve_origin(job)
_cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"
@@ -786,6 +817,10 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
message = format_runtime_provider_error(exc)
raise RuntimeError(message) from exc
_rp = runtime.get("provider","") or ""
_mm = _detect_provider_mismatch(job.get("prompt",""), _rp)
if _mm: logger.warning("Job '%s' refs '%s' but provider is '%s'", job_name, _mm, _rp)
from agent.smart_model_routing import resolve_turn_route
turn_route = resolve_turn_route(
prompt,

View File

@@ -1,243 +0,0 @@
"""SSH Dispatch — validated remote hermes execution for cron jobs.
Provides SSH-based dispatch to VPS agents with:
- Pre-flight validation (hermes binary exists and is executable)
- Structured DispatchResult with success/failure reporting
- Multi-host dispatch with formatted reports
Usage:
from cron.ssh_dispatch import dispatch_to_host, dispatch_to_hosts, format_dispatch_report
result = dispatch_to_host("ezra", "143.198.27.163", "Check the beacon repo for open issues")
if not result.success:
print(result.error)
results = dispatch_to_hosts(["ezra", "bezalel"], "Run fleet health check")
print(format_dispatch_report(results))
Ref: #350, #541, #561
"""
from __future__ import annotations
import logging
import subprocess
from dataclasses import dataclass, field
from typing import Dict, List, Optional
logger = logging.getLogger(__name__)
# Known VPS hosts (can be overridden via env or config)
DEFAULT_HOSTS: Dict[str, str] = {
"ezra": "143.198.27.163",
"bezalel": "159.203.146.185",
}
# SSH options for non-interactive, fast-fail connections
_SSH_OPTS = [
"-o", "ConnectTimeout=10",
"-o", "StrictHostKeyChecking=accept-new",
"-o", "BatchMode=yes",
"-o", "LogLevel=ERROR",
]
# Paths to check for hermes binary on remote
_HERMES_CHECK_PATHS = [
"~/.local/bin/hermes",
"/usr/local/bin/hermes",
"~/.hermes/bin/hermes",
]
@dataclass
class DispatchResult:
"""Result of an SSH dispatch attempt."""
host: str
address: str
success: bool
output: str = ""
error: str = ""
hermes_found: bool = False
hermes_path: str = ""
exit_code: int = -1
@property
def summary(self) -> str:
if self.success:
return f"[OK] {self.host} ({self.address})"
return f"[FAIL] {self.host} ({self.address}): {self.error}"
def probe_hermes(host: str, address: str) -> tuple[bool, str]:
"""Check if hermes binary exists and is executable on remote host.
Returns (found, path).
"""
check_cmds = " || ".join(f"test -x {p} && echo {p}" for p in _HERMES_CHECK_PATHS)
remote_cmd = f"bash -c '{check_cmds} || echo NOTFOUND'"
try:
result = subprocess.run(
["ssh", address, *_SSH_OPTS, remote_cmd],
capture_output=True,
text=True,
timeout=15,
)
output = result.stdout.strip()
if output and output != "NOTFOUND":
return True, output
return False, ""
except subprocess.TimeoutExpired:
logger.warning("SSH probe timed out for %s", host)
return False, ""
except Exception as e:
logger.warning("SSH probe failed for %s: %s", host, e)
return False, ""
def dispatch_to_host(
host: str,
address: str,
prompt: str,
timeout: int = 300,
validate: bool = True,
) -> DispatchResult:
"""Dispatch a prompt to a remote hermes instance via SSH.
Args:
host: Hostname (ezra, bezalel, etc.)
address: IP address or hostname
prompt: The prompt/task to dispatch
timeout: SSH timeout in seconds
validate: Whether to probe for hermes binary first
Returns:
DispatchResult with success/failure details.
"""
# Pre-flight validation
if validate:
found, path = probe_hermes(host, address)
if not found:
return DispatchResult(
host=host,
address=address,
success=False,
error="hermes binary not found on remote host",
hermes_found=False,
)
else:
found, path = True, "~/.local/bin/hermes"
# Build the dispatch command
# Use hermes chat in quiet mode, pipe prompt via stdin
escaped_prompt = prompt.replace("'", "'\\''")
remote_cmd = f"echo '{escaped_prompt}' | {path} chat --quiet"
try:
result = subprocess.run(
["ssh", address, *_SSH_OPTS, remote_cmd],
capture_output=True,
text=True,
timeout=timeout,
)
success = result.returncode == 0
error = ""
if not success:
error = result.stderr.strip() if result.stderr else f"exit code {result.returncode}"
return DispatchResult(
host=host,
address=address,
success=success,
output=result.stdout.strip()[:500], # Truncate long output
error=error,
hermes_found=found,
hermes_path=path,
exit_code=result.returncode,
)
except subprocess.TimeoutExpired:
return DispatchResult(
host=host,
address=address,
success=False,
error=f"SSH dispatch timed out after {timeout}s",
hermes_found=found,
hermes_path=path,
)
except Exception as e:
return DispatchResult(
host=host,
address=address,
success=False,
error=f"SSH dispatch failed: {e}",
hermes_found=found,
hermes_path=path,
)
def dispatch_to_hosts(
hosts: List[str],
prompt: str,
host_map: Optional[Dict[str, str]] = None,
timeout: int = 300,
) -> List[DispatchResult]:
"""Dispatch a prompt to multiple hosts.
Args:
hosts: List of hostnames
prompt: The prompt/task to dispatch
host_map: Optional override of hostname -> address mapping
timeout: SSH timeout per host
Returns:
List of DispatchResult, one per host.
"""
addresses = host_map or DEFAULT_HOSTS
results = []
for host in hosts:
address = addresses.get(host)
if not address:
results.append(DispatchResult(
host=host,
address="unknown",
success=False,
error=f"Unknown host: {host}",
))
continue
result = dispatch_to_host(host, address, prompt, timeout=timeout)
results.append(result)
logger.info(result.summary)
return results
def format_dispatch_report(results: List[DispatchResult]) -> str:
"""Format a multi-host dispatch results as a readable report."""
if not results:
return "No dispatch results."
lines = ["SSH Dispatch Report", "=" * 40, ""]
ok_count = sum(1 for r in results if r.success)
fail_count = len(results) - ok_count
lines.append(f"Total: {len(results)} | OK: {ok_count} | FAIL: {fail_count}")
lines.append("")
for r in results:
status = "" if r.success else ""
lines.append(f" {status} {r.host} ({r.address})")
if r.hermes_path:
lines.append(f" hermes: {r.hermes_path}")
if r.success and r.output:
lines.append(f" output: {r.output[:100]}...")
if not r.success:
lines.append(f" error: {r.error}")
lines.append("")
return "\n".join(lines)

17
tests/test_372_runtime.py Normal file
View File

@@ -0,0 +1,17 @@
"""Tests for #372."""
import sys; sys.path.insert(0, str(__import__('pathlib').Path(__file__).resolve().parent.parent))
def _imp():
import importlib.util as iu
s=iu.spec_from_file_location("cs",str(__import__('pathlib').Path(__file__).resolve().parent.parent/"cron"/"scheduler.py"))
m=iu.module_from_spec(s)
try: s.loader.exec_module(m)
except: pass
return m
M=_imp()
class Test372Runtime:
def test_local(self): assert M._classify_runtime("ollama","q")=="local"
def test_cloud(self): assert M._classify_runtime("nous","m")=="cloud"
def test_mismatch(self): assert M._detect_provider_mismatch("Check Ollama","nous")=="ollama"
def test_none(self): assert M._detect_provider_mismatch("Check Nous","nous") is None
def test_cloud_ctx(self): assert "cloud" in M._build_job_prompt({"p":"x"},runtime_model="n/m",runtime_provider="nous").lower()
if __name__=="__main__": __import__('pytest').main([__file__,"-v"])