diff --git a/cron/scheduler.py b/cron/scheduler.py index 53cf71421..2ccf4585f 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -10,6 +10,7 @@ runs at a time if multiple processes overlap. import asyncio import concurrent.futures +import inspect import json import logging import os @@ -50,6 +51,71 @@ _KNOWN_DELIVERY_PLATFORMS = frozenset({ from cron.jobs import get_due_jobs, mark_job_run, save_job_output, advance_next_run +# --------------------------------------------------------------------------- +# Deploy Sync Guard +# --------------------------------------------------------------------------- +# The scheduler passes keyword arguments to AIAgent() that may not exist in +# older installed versions. When the installed run_agent.py diverges from the +# version the scheduler was written against, every cron job fails with a +# TypeError — a silent total outage. This guard catches that at the first +# tick rather than after 1000+ error log lines. +# +# The check runs once per gateway process (cached by _agent_interface_validated). +# It inspects AIAgent.__init__ and verifies every parameter the scheduler +# passes is accepted. On mismatch it raises RuntimeError with a fix command. + +_agent_interface_validated = False + +# Parameters the scheduler passes to AIAgent() in run_job(). +# If you add a new kwarg to the AIAgent() call below, add it here too. +_SCHEDULER_AGENT_KWARGS = { + "tool_choice": "required", + "skip_memory": True, + "platform": "cron", +} + + +def _validate_agent_interface(): + """Verify AIAgent.__init__ accepts every kwarg the scheduler uses. + + Raises RuntimeError with a remediation message on mismatch. + Called once per process from run_job(); subsequent calls are no-ops. + """ + global _agent_interface_validated + if _agent_interface_validated: + return + + try: + from run_agent import AIAgent + except ImportError as exc: + raise RuntimeError( + f"Deploy sync guard: cannot import AIAgent from run_agent: {exc}\n" + "The installed hermes-agent package may be corrupted. " + "Reinstall: pip install -e ~/.hermes/hermes-agent" + ) from exc + + sig = inspect.signature(AIAgent.__init__) + params = set(sig.parameters.keys()) - {"self"} + + missing = [kw for kw in _SCHEDULER_AGENT_KWARGS if kw not in params] + if missing: + raise RuntimeError( + "Deploy sync guard: AIAgent.__init__() is missing parameters that " + "the cron scheduler requires. This means the installed code is out " + "of sync with the scheduler module.\n" + f" Missing parameters: {', '.join(missing)}\n" + f" Expected by: cron/scheduler.py (run_job → AIAgent())\n" + f" Fix: pip install -e ~/.hermes/hermes-agent --force-reinstall\n" + f" Then restart the gateway." + ) + + _agent_interface_validated = True + logger.info( + "Deploy sync guard: AIAgent interface OK (%d params, %d scheduler deps)", + len(params), len(_SCHEDULER_AGENT_KWARGS), + ) + + # Sentinel: when a cron agent has nothing new to report, it can start its # response with this marker to suppress delivery. Output is still saved # locally for audit. @@ -470,7 +536,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: Tuple of (success, full_output_doc, final_response, error_message) """ from run_agent import AIAgent - + + # Deploy sync guard: verify the installed AIAgent accepts all kwargs + # the scheduler passes. Catches stale-code-deploy bugs before they + # cascade into 1000+ TypeErrors and gateway restart loops. + _validate_agent_interface() + # Initialize SQLite session store so cron job messages are persisted # and discoverable via session_search (same pattern as gateway/run.py). _session_db = None diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index ec73cbb8f..da184a498 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -863,3 +863,54 @@ class TestTickAdvanceBeforeRun: adv_mock.assert_called_once_with("test-advance") # advance must happen before run assert call_order == [("advance", "test-advance"), ("run", "test-advance")] + + +class TestDeploySyncGuard: + """Tests for _validate_agent_interface() — the deploy sync guard.""" + + def test_passes_when_all_params_present(self): + """Validation passes when AIAgent accepts every scheduler kwarg.""" + from cron.scheduler import _validate_agent_interface, _agent_interface_validated + import cron.scheduler as sched_mod + + # Reset the cached flag so the check actually runs. + sched_mod._agent_interface_validated = False + # Should not raise. + _validate_agent_interface() + assert sched_mod._agent_interface_validated is True + + def test_fails_when_param_missing(self): + """Validation raises RuntimeError when AIAgent is missing a required param.""" + import cron.scheduler as sched_mod + from unittest.mock import MagicMock + import inspect + + # Save and restore. + orig_flag = sched_mod._agent_interface_validated + try: + sched_mod._agent_interface_validated = False + + # Build a fake AIAgent class whose __init__ lacks 'tool_choice'. + class FakeAIAgent: + def __init__(self, model="", max_iterations=90, quiet_mode=False, + disabled_toolsets=None, skip_memory=False, platform=None, + session_id=None, session_db=None): + pass + + fake_module = MagicMock() + fake_module.AIAgent = FakeAIAgent + + with pytest.raises(RuntimeError, match="Missing parameters: tool_choice"): + with patch.dict("sys.modules", {"run_agent": fake_module}): + sched_mod._validate_agent_interface() + finally: + sched_mod._agent_interface_validated = orig_flag + + def test_cached_after_first_run(self): + """Second call is a no-op (uses cached flag).""" + import cron.scheduler as sched_mod + + sched_mod._agent_interface_validated = True + # Should not raise even if we somehow break AIAgent — the flag is set. + sched_mod._validate_agent_interface() + # No exception = pass.