forked from Rockachopa/Timmy-time-dashboard
Merge pull request #48 from AlexanderWhitestone/fix/timmy-startup-and-stability
fix: Timmy QA bugs — calculator, markdown, prompt guardrails, briefing
This commit is contained in:
@@ -14,6 +14,8 @@
|
||||
<link rel="stylesheet" href="/static/style.css?v=4" />
|
||||
{% block extra_styles %}{% endblock %}
|
||||
<script src="https://unpkg.com/htmx.org@2.0.3" integrity="sha384-0895/pl2MU10Hqc6jd4RvrthNlDiE9U1tWmX7WRESftEDRosgxNsQG/Ze9YMRzHq" crossorigin="anonymous"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked@15.0.7/marked.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/dompurify@3.2.4/dist/purify.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<header class="mc-header">
|
||||
|
||||
@@ -5,8 +5,16 @@
|
||||
{% if response %}
|
||||
<div class="chat-message agent">
|
||||
<div class="msg-meta">TIMMY // {{ timestamp }}</div>
|
||||
<div class="msg-body">{{ response | e }}</div>
|
||||
<div class="msg-body timmy-md">{{ response | e }}</div>
|
||||
</div>
|
||||
<script>
|
||||
(function() {
|
||||
var el = document.currentScript.previousElementSibling.querySelector('.timmy-md');
|
||||
if (el && typeof marked !== 'undefined' && typeof DOMPurify !== 'undefined') {
|
||||
el.innerHTML = DOMPurify.sanitize(marked.parse(el.textContent));
|
||||
}
|
||||
})();
|
||||
</script>
|
||||
{% elif error %}
|
||||
<div class="chat-message error-msg">
|
||||
<div class="msg-meta">SYSTEM // {{ timestamp }}</div>
|
||||
|
||||
@@ -124,14 +124,21 @@ notifier = PushNotifier()
|
||||
|
||||
|
||||
async def notify_briefing_ready(briefing) -> None:
|
||||
"""Placeholder: notify the owner that a new morning briefing is ready.
|
||||
"""Notify the owner that a new morning briefing is ready.
|
||||
|
||||
Logs to console now. Wire to real push (APNs/Pushover) later.
|
||||
Only triggers a native macOS popup when there are pending approval items.
|
||||
Briefings with 0 approvals are still logged but don't interrupt the user
|
||||
with a notification that leads to an empty-looking page.
|
||||
|
||||
Args:
|
||||
briefing: A timmy.briefing.Briefing instance.
|
||||
"""
|
||||
n_approvals = len(briefing.approval_items) if briefing.approval_items else 0
|
||||
|
||||
if n_approvals == 0:
|
||||
logger.info("Briefing ready but no pending approvals — skipping native notification")
|
||||
return
|
||||
|
||||
message = (
|
||||
f"Your morning briefing is ready. "
|
||||
f"{n_approvals} item(s) await your approval."
|
||||
|
||||
@@ -21,7 +21,12 @@ Rules:
|
||||
- Remember what the user tells you during our conversation.
|
||||
- If you don't know something, say so honestly.
|
||||
- Use the user's name if you know it.
|
||||
- Do simple math in your head. Don't reach for tools.
|
||||
- When you state a fact, commit to it. Never contradict a correct statement you
|
||||
just made in the same response. If uncertain, express uncertainty at the start —
|
||||
never state something confidently and then immediately undermine it.
|
||||
- NEVER attempt arithmetic in your head — LLMs are unreliable at multi-digit math.
|
||||
If asked to compute anything (multiply, divide, square root, exponents, etc.),
|
||||
tell the user you need a calculator tool to give an exact answer.
|
||||
|
||||
Sir, affirmative."""
|
||||
|
||||
@@ -57,15 +62,17 @@ user's digital sovereignty.
|
||||
### When NOT to use tools:
|
||||
- Identity questions → Answer directly
|
||||
- General knowledge → Answer from training
|
||||
- Simple math → Calculate mentally
|
||||
- Greetings → Respond conversationally
|
||||
|
||||
### When TO use tools:
|
||||
|
||||
- **calculator** — ANY arithmetic: multiplication, division, square roots, exponents,
|
||||
percentages, logarithms, etc. NEVER attempt math in your head — always call this tool.
|
||||
Example: calculator("347 * 829") or calculator("math.sqrt(17161)")
|
||||
- **web_search** — Current events, real-time data, news
|
||||
- **read_file** — User explicitly requests file reading
|
||||
- **write_file** — User explicitly requests saving content
|
||||
- **python** — Complex calculations, code execution
|
||||
- **python** — Code execution, data processing (NOT for simple arithmetic — use calculator)
|
||||
- **shell** — System operations (explicit user request)
|
||||
- **memory_search** — "Have we talked about this before?", finding past context
|
||||
|
||||
@@ -74,6 +81,9 @@ user's digital sovereignty.
|
||||
- Never narrate your reasoning process. Just give the answer.
|
||||
- Never show raw tool call JSON or function syntax in responses.
|
||||
- Use the user's name if known.
|
||||
- When you state a fact, commit to it. Never contradict a correct statement you
|
||||
just made in the same response. If uncertain, express uncertainty at the start —
|
||||
never state something confidently and then immediately undermine it.
|
||||
|
||||
Sir, affirmative."""
|
||||
|
||||
@@ -101,12 +111,13 @@ you are operational and running locally."""
|
||||
TOOL_USAGE_GUIDE = """
|
||||
DECISION ORDER:
|
||||
|
||||
1. Can I answer from training data? → Answer directly (NO TOOL)
|
||||
2. Is this about past conversations? → memory_search
|
||||
3. Is this current/real-time info? → web_search
|
||||
4. Did user request file operations? → file tools
|
||||
5. Requires calculation/code? → python
|
||||
6. System command requested? → shell
|
||||
1. Is this arithmetic or math? → calculator (ALWAYS — never compute in your head)
|
||||
2. Can I answer from training data? → Answer directly (NO TOOL)
|
||||
3. Is this about past conversations? → memory_search
|
||||
4. Is this current/real-time info? → web_search
|
||||
5. Did user request file operations? → file tools
|
||||
6. Requires code execution? → python
|
||||
7. System command requested? → shell
|
||||
|
||||
MEMORY SEARCH TRIGGERS:
|
||||
- "Have we discussed..."
|
||||
|
||||
@@ -32,7 +32,7 @@ _TOOL_CALL_JSON = re.compile(
|
||||
|
||||
# Matches function-call-style text: memory_search(query="...") etc.
|
||||
_FUNC_CALL_TEXT = re.compile(
|
||||
r'\b(?:memory_search|web_search|shell|python|read_file|write_file|list_files)'
|
||||
r'\b(?:memory_search|web_search|shell|python|read_file|write_file|list_files|calculator)'
|
||||
r'\s*\([^)]*\)',
|
||||
)
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ Tools are assigned to personas based on their specialties:
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import math
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
@@ -107,6 +108,33 @@ def get_tool_stats(agent_id: str | None = None) -> dict:
|
||||
return all_stats
|
||||
|
||||
|
||||
def calculator(expression: str) -> str:
|
||||
"""Evaluate a mathematical expression and return the exact result.
|
||||
|
||||
Use this tool for ANY arithmetic: multiplication, division, square roots,
|
||||
exponents, percentages, logarithms, trigonometry, etc.
|
||||
|
||||
Args:
|
||||
expression: A valid Python math expression, e.g. '347 * 829',
|
||||
'math.sqrt(17161)', '2**10', 'math.log(100, 10)'.
|
||||
|
||||
Returns:
|
||||
The exact result as a string.
|
||||
"""
|
||||
# Only expose math functions — no builtins, no file/os access
|
||||
allowed_names = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")}
|
||||
allowed_names["math"] = math # Support math.sqrt(), math.pi, etc.
|
||||
allowed_names["abs"] = abs
|
||||
allowed_names["round"] = round
|
||||
allowed_names["min"] = min
|
||||
allowed_names["max"] = max
|
||||
try:
|
||||
result = eval(expression, {"__builtins__": {}}, allowed_names) # noqa: S307
|
||||
return str(result)
|
||||
except Exception as e:
|
||||
return f"Error evaluating '{expression}': {e}"
|
||||
|
||||
|
||||
def create_research_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for research personas (Echo).
|
||||
|
||||
@@ -280,13 +308,16 @@ def create_full_toolkit(base_dir: str | Path | None = None):
|
||||
toolkit.register(file_tools.save_file, name="write_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
# Calculator — exact arithmetic (never let the LLM guess)
|
||||
toolkit.register(calculator, name="calculator")
|
||||
|
||||
# Memory search - semantic recall
|
||||
try:
|
||||
from timmy.semantic_memory import memory_search
|
||||
toolkit.register(memory_search, name="memory_search")
|
||||
except Exception:
|
||||
logger.debug("Memory search not available")
|
||||
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
@@ -371,6 +402,11 @@ def get_all_available_tools() -> dict[str, dict]:
|
||||
"description": "List files in a directory",
|
||||
"available_in": ["echo", "seer", "forge", "quill", "mace", "helm", "timmy"],
|
||||
},
|
||||
"calculator": {
|
||||
"name": "Calculator",
|
||||
"description": "Evaluate mathematical expressions with exact results",
|
||||
"available_in": ["timmy"],
|
||||
},
|
||||
}
|
||||
|
||||
# ── Git tools ─────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -233,11 +233,35 @@ def test_call_agent_falls_back_on_exception(engine):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_notify_briefing_ready_logs(caplog):
|
||||
"""notify_briefing_ready should log and call notifier.notify."""
|
||||
from notifications.push import notify_briefing_ready, PushNotifier
|
||||
async def test_notify_briefing_ready_skips_when_no_approvals(caplog):
|
||||
"""notify_briefing_ready should NOT fire native notification with 0 approvals."""
|
||||
from notifications.push import notify_briefing_ready
|
||||
|
||||
b = _make_briefing() # approval_items=[]
|
||||
|
||||
with patch("notifications.push.notifier") as mock_notifier:
|
||||
await notify_briefing_ready(b)
|
||||
mock_notifier.notify.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_notify_briefing_ready_fires_when_approvals_exist():
|
||||
"""notify_briefing_ready should fire when there are pending approval items."""
|
||||
from notifications.push import notify_briefing_ready
|
||||
from timmy.briefing import ApprovalItem
|
||||
|
||||
b = _make_briefing()
|
||||
b.approval_items = [
|
||||
ApprovalItem(
|
||||
id="test-1",
|
||||
title="Test approval",
|
||||
description="A test item",
|
||||
proposed_action="do something",
|
||||
impact="low",
|
||||
created_at=datetime.now(timezone.utc),
|
||||
status="pending",
|
||||
),
|
||||
]
|
||||
|
||||
with patch("notifications.push.notifier") as mock_notifier:
|
||||
await notify_briefing_ready(b)
|
||||
|
||||
85
tests/test_calculator.py
Normal file
85
tests/test_calculator.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""Tests for the calculator tool."""
|
||||
|
||||
from timmy.tools import calculator
|
||||
|
||||
|
||||
def test_basic_multiplication():
|
||||
assert calculator("347 * 829") == "287663"
|
||||
|
||||
|
||||
def test_basic_addition():
|
||||
assert calculator("100 + 200") == "300"
|
||||
|
||||
|
||||
def test_basic_division():
|
||||
assert calculator("100 / 4") == "25.0"
|
||||
|
||||
|
||||
def test_integer_division():
|
||||
assert calculator("100 // 3") == "33"
|
||||
|
||||
|
||||
def test_exponentiation():
|
||||
assert calculator("2 ** 10") == "1024"
|
||||
|
||||
|
||||
def test_sqrt():
|
||||
assert calculator("math.sqrt(17161)") == "131.0"
|
||||
|
||||
|
||||
def test_sqrt_non_perfect():
|
||||
result = float(calculator("math.sqrt(2)"))
|
||||
assert abs(result - 1.4142135623730951) < 1e-10
|
||||
|
||||
|
||||
def test_log_base_10():
|
||||
result = float(calculator("math.log10(1000)"))
|
||||
assert abs(result - 3.0) < 1e-10
|
||||
|
||||
|
||||
def test_log_natural():
|
||||
result = float(calculator("math.log(math.e)"))
|
||||
assert abs(result - 1.0) < 1e-10
|
||||
|
||||
|
||||
def test_trig_sin():
|
||||
result = float(calculator("math.sin(math.pi / 2)"))
|
||||
assert abs(result - 1.0) < 1e-10
|
||||
|
||||
|
||||
def test_abs_builtin():
|
||||
assert calculator("abs(-42)") == "42"
|
||||
|
||||
|
||||
def test_round_builtin():
|
||||
assert calculator("round(3.14159, 2)") == "3.14"
|
||||
|
||||
|
||||
def test_min_max_builtins():
|
||||
assert calculator("min(3, 7, 1)") == "1"
|
||||
assert calculator("max(3, 7, 1)") == "7"
|
||||
|
||||
|
||||
def test_complex_expression():
|
||||
assert calculator("(347 * 829) + (100 / 4)") == "287688.0"
|
||||
|
||||
|
||||
def test_invalid_expression_returns_error():
|
||||
result = calculator("not a valid expression")
|
||||
assert result.startswith("Error evaluating")
|
||||
|
||||
|
||||
def test_no_builtins_access():
|
||||
"""Ensure dangerous builtins like __import__ are blocked."""
|
||||
result = calculator("__import__('os').system('echo pwned')")
|
||||
assert result.startswith("Error evaluating")
|
||||
|
||||
|
||||
def test_no_open_access():
|
||||
result = calculator("open('/etc/passwd').read()")
|
||||
assert result.startswith("Error evaluating")
|
||||
|
||||
|
||||
def test_division_by_zero():
|
||||
result = calculator("1 / 0")
|
||||
assert result.startswith("Error evaluating")
|
||||
Reference in New Issue
Block a user