fix: calculator tool, markdown rendering, prompt guardrails, briefing notification

- Add sandboxed calculator tool to Timmy's toolkit so arithmetic questions get exact answers instead of LLM hallucinations - Update system prompts (lite + full) to instruct Timmy to always use the calculator and never attempt multi-digit math in his head - Add self-contradiction guard to both prompts ("commit to your facts") - Render Timmy's chat responses as markdown via marked.js + DOMPurify instead of raw escaped text - Suppress empty briefing notification on startup when there are 0 pending approval items - Add calculator to session response sanitizer regex - 18 new calculator tests, 2 updated briefing notification tests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 09:35:59 -05:00
parent 05d4dc997c
commit 6e6b4355bb
8 changed files with 190 additions and 17 deletions
--- a/tests/test_briefing.py
+++ b/tests/test_briefing.py
@@ -233,11 +233,35 @@ def test_call_agent_falls_back_on_exception(engine):
 # ---------------------------------------------------------------------------

@pytest.mark.asyncio
-async def test_notify_briefing_ready_logs(caplog):
-    """notify_briefing_ready should log and call notifier.notify."""
-    from notifications.push import notify_briefing_ready, PushNotifier
+async def test_notify_briefing_ready_skips_when_no_approvals(caplog):
+    """notify_briefing_ready should NOT fire native notification with 0 approvals."""
+    from notifications.push import notify_briefing_ready
+
+    b = _make_briefing()  # approval_items=[]
+
+    with patch("notifications.push.notifier") as mock_notifier:
+        await notify_briefing_ready(b)
+        mock_notifier.notify.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_notify_briefing_ready_fires_when_approvals_exist():
+    """notify_briefing_ready should fire when there are pending approval items."""
+    from notifications.push import notify_briefing_ready
+    from timmy.briefing import ApprovalItem

    b = _make_briefing()
+    b.approval_items = [
+        ApprovalItem(
+            id="test-1",
+            title="Test approval",
+            description="A test item",
+            proposed_action="do something",
+            impact="low",
+            created_at=datetime.now(timezone.utc),
+            status="pending",
+        ),
+    ]

    with patch("notifications.push.notifier") as mock_notifier:
        await notify_briefing_ready(b)
--- a/tests/test_calculator.py
+++ b/tests/test_calculator.py
@@ -0,0 +1,85 @@
+"""Tests for the calculator tool."""
+
+from timmy.tools import calculator
+
+
+def test_basic_multiplication():
+    assert calculator("347 * 829") == "287663"
+
+
+def test_basic_addition():
+    assert calculator("100 + 200") == "300"
+
+
+def test_basic_division():
+    assert calculator("100 / 4") == "25.0"
+
+
+def test_integer_division():
+    assert calculator("100 // 3") == "33"
+
+
+def test_exponentiation():
+    assert calculator("2 ** 10") == "1024"
+
+
+def test_sqrt():
+    assert calculator("math.sqrt(17161)") == "131.0"
+
+
+def test_sqrt_non_perfect():
+    result = float(calculator("math.sqrt(2)"))
+    assert abs(result - 1.4142135623730951) < 1e-10
+
+
+def test_log_base_10():
+    result = float(calculator("math.log10(1000)"))
+    assert abs(result - 3.0) < 1e-10
+
+
+def test_log_natural():
+    result = float(calculator("math.log(math.e)"))
+    assert abs(result - 1.0) < 1e-10
+
+
+def test_trig_sin():
+    result = float(calculator("math.sin(math.pi / 2)"))
+    assert abs(result - 1.0) < 1e-10
+
+
+def test_abs_builtin():
+    assert calculator("abs(-42)") == "42"
+
+
+def test_round_builtin():
+    assert calculator("round(3.14159, 2)") == "3.14"
+
+
+def test_min_max_builtins():
+    assert calculator("min(3, 7, 1)") == "1"
+    assert calculator("max(3, 7, 1)") == "7"
+
+
+def test_complex_expression():
+    assert calculator("(347 * 829) + (100 / 4)") == "287688.0"
+
+
+def test_invalid_expression_returns_error():
+    result = calculator("not a valid expression")
+    assert result.startswith("Error evaluating")
+
+
+def test_no_builtins_access():
+    """Ensure dangerous builtins like __import__ are blocked."""
+    result = calculator("__import__('os').system('echo pwned')")
+    assert result.startswith("Error evaluating")
+
+
+def test_no_open_access():
+    result = calculator("open('/etc/passwd').read()")
+    assert result.startswith("Error evaluating")
+
+
+def test_division_by_zero():
+    result = calculator("1 / 0")
+    assert result.startswith("Error evaluating")