diff --git a/src/dashboard/templates/base.html b/src/dashboard/templates/base.html index 1d087d4..de527f7 100644 --- a/src/dashboard/templates/base.html +++ b/src/dashboard/templates/base.html @@ -14,6 +14,8 @@ {% block extra_styles %}{% endblock %} + +
diff --git a/src/dashboard/templates/partials/chat_message.html b/src/dashboard/templates/partials/chat_message.html index 5d48134..d139983 100644 --- a/src/dashboard/templates/partials/chat_message.html +++ b/src/dashboard/templates/partials/chat_message.html @@ -5,8 +5,16 @@ {% if response %}
TIMMY // {{ timestamp }}
-
{{ response | e }}
+
{{ response | e }}
+ {% elif error %}
SYSTEM // {{ timestamp }}
diff --git a/src/notifications/push.py b/src/notifications/push.py index 80f9f88..26017e4 100644 --- a/src/notifications/push.py +++ b/src/notifications/push.py @@ -124,14 +124,21 @@ notifier = PushNotifier() async def notify_briefing_ready(briefing) -> None: - """Placeholder: notify the owner that a new morning briefing is ready. + """Notify the owner that a new morning briefing is ready. - Logs to console now. Wire to real push (APNs/Pushover) later. + Only triggers a native macOS popup when there are pending approval items. + Briefings with 0 approvals are still logged but don't interrupt the user + with a notification that leads to an empty-looking page. Args: briefing: A timmy.briefing.Briefing instance. """ n_approvals = len(briefing.approval_items) if briefing.approval_items else 0 + + if n_approvals == 0: + logger.info("Briefing ready but no pending approvals — skipping native notification") + return + message = ( f"Your morning briefing is ready. " f"{n_approvals} item(s) await your approval." diff --git a/src/timmy/prompts.py b/src/timmy/prompts.py index 3c6428e..e7ce2e0 100644 --- a/src/timmy/prompts.py +++ b/src/timmy/prompts.py @@ -21,7 +21,12 @@ Rules: - Remember what the user tells you during our conversation. - If you don't know something, say so honestly. - Use the user's name if you know it. -- Do simple math in your head. Don't reach for tools. +- When you state a fact, commit to it. Never contradict a correct statement you + just made in the same response. If uncertain, express uncertainty at the start — + never state something confidently and then immediately undermine it. +- NEVER attempt arithmetic in your head — LLMs are unreliable at multi-digit math. + If asked to compute anything (multiply, divide, square root, exponents, etc.), + tell the user you need a calculator tool to give an exact answer. Sir, affirmative.""" @@ -57,15 +62,17 @@ user's digital sovereignty. ### When NOT to use tools: - Identity questions → Answer directly - General knowledge → Answer from training -- Simple math → Calculate mentally - Greetings → Respond conversationally ### When TO use tools: +- **calculator** — ANY arithmetic: multiplication, division, square roots, exponents, + percentages, logarithms, etc. NEVER attempt math in your head — always call this tool. + Example: calculator("347 * 829") or calculator("math.sqrt(17161)") - **web_search** — Current events, real-time data, news - **read_file** — User explicitly requests file reading - **write_file** — User explicitly requests saving content -- **python** — Complex calculations, code execution +- **python** — Code execution, data processing (NOT for simple arithmetic — use calculator) - **shell** — System operations (explicit user request) - **memory_search** — "Have we talked about this before?", finding past context @@ -74,6 +81,9 @@ user's digital sovereignty. - Never narrate your reasoning process. Just give the answer. - Never show raw tool call JSON or function syntax in responses. - Use the user's name if known. +- When you state a fact, commit to it. Never contradict a correct statement you + just made in the same response. If uncertain, express uncertainty at the start — + never state something confidently and then immediately undermine it. Sir, affirmative.""" @@ -101,12 +111,13 @@ you are operational and running locally.""" TOOL_USAGE_GUIDE = """ DECISION ORDER: -1. Can I answer from training data? → Answer directly (NO TOOL) -2. Is this about past conversations? → memory_search -3. Is this current/real-time info? → web_search -4. Did user request file operations? → file tools -5. Requires calculation/code? → python -6. System command requested? → shell +1. Is this arithmetic or math? → calculator (ALWAYS — never compute in your head) +2. Can I answer from training data? → Answer directly (NO TOOL) +3. Is this about past conversations? → memory_search +4. Is this current/real-time info? → web_search +5. Did user request file operations? → file tools +6. Requires code execution? → python +7. System command requested? → shell MEMORY SEARCH TRIGGERS: - "Have we discussed..." diff --git a/src/timmy/session.py b/src/timmy/session.py index 79965f8..cfaff04 100644 --- a/src/timmy/session.py +++ b/src/timmy/session.py @@ -32,7 +32,7 @@ _TOOL_CALL_JSON = re.compile( # Matches function-call-style text: memory_search(query="...") etc. _FUNC_CALL_TEXT = re.compile( - r'\b(?:memory_search|web_search|shell|python|read_file|write_file|list_files)' + r'\b(?:memory_search|web_search|shell|python|read_file|write_file|list_files|calculator)' r'\s*\([^)]*\)', ) diff --git a/src/timmy/tools.py b/src/timmy/tools.py index 3bd992d..d680d1a 100644 --- a/src/timmy/tools.py +++ b/src/timmy/tools.py @@ -26,6 +26,7 @@ Tools are assigned to personas based on their specialties: from __future__ import annotations import logging +import math from dataclasses import dataclass, field from datetime import datetime, timezone from pathlib import Path @@ -107,6 +108,33 @@ def get_tool_stats(agent_id: str | None = None) -> dict: return all_stats +def calculator(expression: str) -> str: + """Evaluate a mathematical expression and return the exact result. + + Use this tool for ANY arithmetic: multiplication, division, square roots, + exponents, percentages, logarithms, trigonometry, etc. + + Args: + expression: A valid Python math expression, e.g. '347 * 829', + 'math.sqrt(17161)', '2**10', 'math.log(100, 10)'. + + Returns: + The exact result as a string. + """ + # Only expose math functions — no builtins, no file/os access + allowed_names = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")} + allowed_names["math"] = math # Support math.sqrt(), math.pi, etc. + allowed_names["abs"] = abs + allowed_names["round"] = round + allowed_names["min"] = min + allowed_names["max"] = max + try: + result = eval(expression, {"__builtins__": {}}, allowed_names) # noqa: S307 + return str(result) + except Exception as e: + return f"Error evaluating '{expression}': {e}" + + def create_research_tools(base_dir: str | Path | None = None): """Create tools for research personas (Echo). @@ -280,13 +308,16 @@ def create_full_toolkit(base_dir: str | Path | None = None): toolkit.register(file_tools.save_file, name="write_file") toolkit.register(file_tools.list_files, name="list_files") + # Calculator — exact arithmetic (never let the LLM guess) + toolkit.register(calculator, name="calculator") + # Memory search - semantic recall try: from timmy.semantic_memory import memory_search toolkit.register(memory_search, name="memory_search") except Exception: logger.debug("Memory search not available") - + return toolkit @@ -371,6 +402,11 @@ def get_all_available_tools() -> dict[str, dict]: "description": "List files in a directory", "available_in": ["echo", "seer", "forge", "quill", "mace", "helm", "timmy"], }, + "calculator": { + "name": "Calculator", + "description": "Evaluate mathematical expressions with exact results", + "available_in": ["timmy"], + }, } # ── Git tools ───────────────────────────────────────────────────────────── diff --git a/tests/test_briefing.py b/tests/test_briefing.py index a3f4134..c239bf5 100644 --- a/tests/test_briefing.py +++ b/tests/test_briefing.py @@ -233,11 +233,35 @@ def test_call_agent_falls_back_on_exception(engine): # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_notify_briefing_ready_logs(caplog): - """notify_briefing_ready should log and call notifier.notify.""" - from notifications.push import notify_briefing_ready, PushNotifier +async def test_notify_briefing_ready_skips_when_no_approvals(caplog): + """notify_briefing_ready should NOT fire native notification with 0 approvals.""" + from notifications.push import notify_briefing_ready + + b = _make_briefing() # approval_items=[] + + with patch("notifications.push.notifier") as mock_notifier: + await notify_briefing_ready(b) + mock_notifier.notify.assert_not_called() + + +@pytest.mark.asyncio +async def test_notify_briefing_ready_fires_when_approvals_exist(): + """notify_briefing_ready should fire when there are pending approval items.""" + from notifications.push import notify_briefing_ready + from timmy.briefing import ApprovalItem b = _make_briefing() + b.approval_items = [ + ApprovalItem( + id="test-1", + title="Test approval", + description="A test item", + proposed_action="do something", + impact="low", + created_at=datetime.now(timezone.utc), + status="pending", + ), + ] with patch("notifications.push.notifier") as mock_notifier: await notify_briefing_ready(b) diff --git a/tests/test_calculator.py b/tests/test_calculator.py new file mode 100644 index 0000000..86d793b --- /dev/null +++ b/tests/test_calculator.py @@ -0,0 +1,85 @@ +"""Tests for the calculator tool.""" + +from timmy.tools import calculator + + +def test_basic_multiplication(): + assert calculator("347 * 829") == "287663" + + +def test_basic_addition(): + assert calculator("100 + 200") == "300" + + +def test_basic_division(): + assert calculator("100 / 4") == "25.0" + + +def test_integer_division(): + assert calculator("100 // 3") == "33" + + +def test_exponentiation(): + assert calculator("2 ** 10") == "1024" + + +def test_sqrt(): + assert calculator("math.sqrt(17161)") == "131.0" + + +def test_sqrt_non_perfect(): + result = float(calculator("math.sqrt(2)")) + assert abs(result - 1.4142135623730951) < 1e-10 + + +def test_log_base_10(): + result = float(calculator("math.log10(1000)")) + assert abs(result - 3.0) < 1e-10 + + +def test_log_natural(): + result = float(calculator("math.log(math.e)")) + assert abs(result - 1.0) < 1e-10 + + +def test_trig_sin(): + result = float(calculator("math.sin(math.pi / 2)")) + assert abs(result - 1.0) < 1e-10 + + +def test_abs_builtin(): + assert calculator("abs(-42)") == "42" + + +def test_round_builtin(): + assert calculator("round(3.14159, 2)") == "3.14" + + +def test_min_max_builtins(): + assert calculator("min(3, 7, 1)") == "1" + assert calculator("max(3, 7, 1)") == "7" + + +def test_complex_expression(): + assert calculator("(347 * 829) + (100 / 4)") == "287688.0" + + +def test_invalid_expression_returns_error(): + result = calculator("not a valid expression") + assert result.startswith("Error evaluating") + + +def test_no_builtins_access(): + """Ensure dangerous builtins like __import__ are blocked.""" + result = calculator("__import__('os').system('echo pwned')") + assert result.startswith("Error evaluating") + + +def test_no_open_access(): + result = calculator("open('/etc/passwd').read()") + assert result.startswith("Error evaluating") + + +def test_division_by_zero(): + result = calculator("1 / 0") + assert result.startswith("Error evaluating")