fix: calculator tool, markdown rendering, prompt guardrails, briefing notification

- Add sandboxed calculator tool to Timmy's toolkit so arithmetic questions
  get exact answers instead of LLM hallucinations
- Update system prompts (lite + full) to instruct Timmy to always use the
  calculator and never attempt multi-digit math in his head
- Add self-contradiction guard to both prompts ("commit to your facts")
- Render Timmy's chat responses as markdown via marked.js + DOMPurify
  instead of raw escaped text
- Suppress empty briefing notification on startup when there are 0
  pending approval items
- Add calculator to session response sanitizer regex
- 18 new calculator tests, 2 updated briefing notification tests

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Alexander Payne
2026-02-26 09:35:59 -05:00
parent 05d4dc997c
commit 6e6b4355bb
8 changed files with 190 additions and 17 deletions

View File

@@ -233,11 +233,35 @@ def test_call_agent_falls_back_on_exception(engine):
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_notify_briefing_ready_logs(caplog):
"""notify_briefing_ready should log and call notifier.notify."""
from notifications.push import notify_briefing_ready, PushNotifier
async def test_notify_briefing_ready_skips_when_no_approvals(caplog):
"""notify_briefing_ready should NOT fire native notification with 0 approvals."""
from notifications.push import notify_briefing_ready
b = _make_briefing() # approval_items=[]
with patch("notifications.push.notifier") as mock_notifier:
await notify_briefing_ready(b)
mock_notifier.notify.assert_not_called()
@pytest.mark.asyncio
async def test_notify_briefing_ready_fires_when_approvals_exist():
"""notify_briefing_ready should fire when there are pending approval items."""
from notifications.push import notify_briefing_ready
from timmy.briefing import ApprovalItem
b = _make_briefing()
b.approval_items = [
ApprovalItem(
id="test-1",
title="Test approval",
description="A test item",
proposed_action="do something",
impact="low",
created_at=datetime.now(timezone.utc),
status="pending",
),
]
with patch("notifications.push.notifier") as mock_notifier:
await notify_briefing_ready(b)

85
tests/test_calculator.py Normal file
View File

@@ -0,0 +1,85 @@
"""Tests for the calculator tool."""
from timmy.tools import calculator
def test_basic_multiplication():
assert calculator("347 * 829") == "287663"
def test_basic_addition():
assert calculator("100 + 200") == "300"
def test_basic_division():
assert calculator("100 / 4") == "25.0"
def test_integer_division():
assert calculator("100 // 3") == "33"
def test_exponentiation():
assert calculator("2 ** 10") == "1024"
def test_sqrt():
assert calculator("math.sqrt(17161)") == "131.0"
def test_sqrt_non_perfect():
result = float(calculator("math.sqrt(2)"))
assert abs(result - 1.4142135623730951) < 1e-10
def test_log_base_10():
result = float(calculator("math.log10(1000)"))
assert abs(result - 3.0) < 1e-10
def test_log_natural():
result = float(calculator("math.log(math.e)"))
assert abs(result - 1.0) < 1e-10
def test_trig_sin():
result = float(calculator("math.sin(math.pi / 2)"))
assert abs(result - 1.0) < 1e-10
def test_abs_builtin():
assert calculator("abs(-42)") == "42"
def test_round_builtin():
assert calculator("round(3.14159, 2)") == "3.14"
def test_min_max_builtins():
assert calculator("min(3, 7, 1)") == "1"
assert calculator("max(3, 7, 1)") == "7"
def test_complex_expression():
assert calculator("(347 * 829) + (100 / 4)") == "287688.0"
def test_invalid_expression_returns_error():
result = calculator("not a valid expression")
assert result.startswith("Error evaluating")
def test_no_builtins_access():
"""Ensure dangerous builtins like __import__ are blocked."""
result = calculator("__import__('os').system('echo pwned')")
assert result.startswith("Error evaluating")
def test_no_open_access():
result = calculator("open('/etc/passwd').read()")
assert result.startswith("Error evaluating")
def test_division_by_zero():
result = calculator("1 / 0")
assert result.startswith("Error evaluating")