Compare commits

..

4 Commits

Author SHA1 Message Date
kimi
75a6a498b4 fix: use word-boundary regex for sensitive pattern matching to avoid false positives on max_tokens
The _SENSITIVE_PATTERNS list used simple substring matching, so "token"
matched "max_tokens", causing the distillation pipeline to block facts
about max_tokens parameters. Replaced with compiled regexes using
lookaround assertions so compound terms like max_tokens and num_tokens
are no longer falsely flagged.

Fixes #625

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-20 16:35:37 -04:00
84302aedac fix: pass max_tokens to Ollama provider in cascade router (#622)
All checks were successful
Tests / lint (push) Successful in 3s
Tests / test (push) Successful in 1m11s
Co-authored-by: Kimi Agent <kimi@timmy.local>
Co-committed-by: Kimi Agent <kimi@timmy.local>
2026-03-20 16:27:24 -04:00
2c217104db feat: real-time Spark visualization in Mission Control (#615)
All checks were successful
Tests / lint (push) Successful in 2s
Tests / test (push) Successful in 1m19s
Co-authored-by: Kimi Agent <kimi@timmy.local>
Co-committed-by: Kimi Agent <kimi@timmy.local>
2026-03-20 16:22:15 -04:00
7452e8a4f0 fix: add missing tests for Tower route /tower (#621)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
Co-authored-by: Kimi Agent <kimi@timmy.local>
Co-committed-by: Kimi Agent <kimi@timmy.local>
2026-03-20 16:22:13 -04:00
5 changed files with 318 additions and 17 deletions

View File

@@ -138,6 +138,47 @@
</div>
</div>
<!-- Spark Intelligence -->
{% from "macros.html" import panel %}
<div class="mc-card-spaced">
<div class="card">
<div class="card-header">
<h2 class="card-title">Spark Intelligence</h2>
<div>
<span class="badge" id="spark-status-badge">Loading...</span>
</div>
</div>
<div class="grid grid-3">
<div class="stat">
<div class="stat-value" id="spark-events">-</div>
<div class="stat-label">Events</div>
</div>
<div class="stat">
<div class="stat-value" id="spark-memories">-</div>
<div class="stat-label">Memories</div>
</div>
<div class="stat">
<div class="stat-value" id="spark-predictions">-</div>
<div class="stat-label">Predictions</div>
</div>
</div>
</div>
<div class="grid grid-2 mc-section-gap">
{% call panel("SPARK TIMELINE", id="spark-timeline-panel",
hx_get="/spark/timeline",
hx_trigger="load, every 10s") %}
<div class="spark-timeline-scroll">
<p class="chat-history-placeholder">Loading timeline...</p>
</div>
{% endcall %}
{% call panel("SPARK INSIGHTS", id="spark-insights-panel",
hx_get="/spark/insights",
hx_trigger="load, every 30s") %}
<p class="chat-history-placeholder">Loading insights...</p>
{% endcall %}
</div>
</div>
<!-- Chat History -->
<div class="card mc-card-spaced">
<div class="card-header">
@@ -428,7 +469,34 @@ async function loadGrokStats() {
}
}
// Load Spark status
async function loadSparkStatus() {
try {
var response = await fetch('/spark');
var data = await response.json();
var st = data.status || {};
document.getElementById('spark-events').textContent = st.total_events || 0;
document.getElementById('spark-memories').textContent = st.total_memories || 0;
document.getElementById('spark-predictions').textContent = st.total_predictions || 0;
var badge = document.getElementById('spark-status-badge');
if (st.total_events > 0) {
badge.textContent = 'Active';
badge.className = 'badge badge-success';
} else {
badge.textContent = 'Idle';
badge.className = 'badge badge-warning';
}
} catch (error) {
var badge = document.getElementById('spark-status-badge');
badge.textContent = 'Offline';
badge.className = 'badge badge-danger';
}
}
// Initial load
loadSparkStatus();
loadSovereignty();
loadHealth();
loadSwarmStats();
@@ -442,5 +510,6 @@ setInterval(loadHealth, 10000);
setInterval(loadSwarmStats, 5000);
setInterval(updateHeartbeat, 5000);
setInterval(loadGrokStats, 10000);
setInterval(loadSparkStatus, 15000);
</script>
{% endblock %}

View File

@@ -564,6 +564,7 @@ class CascadeRouter:
messages=messages,
model=model or provider.get_default_model(),
temperature=temperature,
max_tokens=max_tokens,
content_type=content_type,
)
elif provider.type == "openai":
@@ -604,6 +605,7 @@ class CascadeRouter:
messages: list[dict],
model: str,
temperature: float,
max_tokens: int | None = None,
content_type: ContentType = ContentType.TEXT,
) -> dict:
"""Call Ollama API with multi-modal support."""
@@ -614,13 +616,15 @@ class CascadeRouter:
# Transform messages for Ollama format (including images)
transformed_messages = self._transform_messages_for_ollama(messages)
options = {"temperature": temperature}
if max_tokens:
options["num_predict"] = max_tokens
payload = {
"model": model,
"messages": transformed_messages,
"stream": False,
"options": {
"temperature": temperature,
},
"options": options,
}
timeout = aiohttp.ClientTimeout(total=self.config.timeout_seconds)

View File

@@ -39,19 +39,21 @@ _DEFAULT_DB = Path("data/thoughts.db")
# qwen3 and other reasoning models wrap chain-of-thought in <think> tags
_THINK_TAG_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
# Sensitive patterns that must never be stored as facts
_SENSITIVE_PATTERNS = [
"token",
"password",
"secret",
"api_key",
"apikey",
"credential",
".config/",
"/token",
"access_token",
"private_key",
"ssh_key",
# Sensitive patterns that must never be stored as facts.
# Uses compiled regexes with word boundaries so that compound technical
# terms like "max_tokens" or "num_tokens" are NOT falsely flagged.
_SENSITIVE_RE = [
re.compile(r"(?<![a-z_])token(?![a-z_])", re.IGNORECASE), # "token" but not "max_tokens"
re.compile(r"password", re.IGNORECASE),
re.compile(r"secret", re.IGNORECASE),
re.compile(r"api_key", re.IGNORECASE),
re.compile(r"apikey", re.IGNORECASE),
re.compile(r"credential", re.IGNORECASE),
re.compile(r"\.config/"),
re.compile(r"/token\b"),
re.compile(r"access_token", re.IGNORECASE),
re.compile(r"private_key", re.IGNORECASE),
re.compile(r"ssh_key", re.IGNORECASE),
]
# Meta-observation phrases to filter out from distilled facts
@@ -548,7 +550,7 @@ class ThinkingEngine:
fact_lower = fact.lower()
# Block sensitive information
if any(pat in fact_lower for pat in _SENSITIVE_PATTERNS):
if any(pat.search(fact) for pat in _SENSITIVE_RE):
logger.warning("Distill: blocked sensitive fact: %s", fact[:60])
continue

View File

@@ -0,0 +1,187 @@
"""Tests for Tower dashboard route (/tower)."""
from unittest.mock import MagicMock, patch
def _mock_spark_engine():
"""Return a mock spark_engine with realistic return values."""
engine = MagicMock()
engine.status.return_value = {
"enabled": True,
"events_captured": 5,
"memories_stored": 3,
"predictions": {"total": 2, "avg_accuracy": 0.85},
"event_types": {
"task_posted": 2,
"bid_submitted": 1,
"task_assigned": 1,
"task_completed": 1,
"task_failed": 0,
"agent_joined": 0,
"tool_executed": 0,
"creative_step": 0,
},
}
event = MagicMock()
event.event_type = "task_completed"
event.description = "Task finished"
event.importance = 0.8
event.created_at = "2026-01-01T00:00:00"
event.agent_id = "agent-1234-abcd"
event.task_id = "task-5678-efgh"
event.data = '{"result": "ok"}'
engine.get_timeline.return_value = [event]
pred = MagicMock()
pred.task_id = "task-5678-efgh"
pred.accuracy = 0.9
pred.evaluated_at = "2026-01-01T01:00:00"
pred.created_at = "2026-01-01T00:30:00"
pred.predicted_value = '{"outcome": "success"}'
engine.get_predictions.return_value = [pred]
advisory = MagicMock()
advisory.category = "performance"
advisory.priority = "high"
advisory.title = "Slow tasks"
advisory.detail = "Tasks taking longer than expected"
advisory.suggested_action = "Scale up workers"
engine.get_advisories.return_value = [advisory]
return engine
class TestTowerUI:
"""Tests for GET /tower endpoint."""
@patch("dashboard.routes.tower.spark_engine", new_callable=_mock_spark_engine)
def test_tower_returns_200(self, mock_engine, client):
response = client.get("/tower")
assert response.status_code == 200
@patch("dashboard.routes.tower.spark_engine", new_callable=_mock_spark_engine)
def test_tower_returns_html(self, mock_engine, client):
response = client.get("/tower")
assert "text/html" in response.headers["content-type"]
@patch("dashboard.routes.tower.spark_engine", new_callable=_mock_spark_engine)
def test_tower_contains_dashboard_content(self, mock_engine, client):
response = client.get("/tower")
body = response.text
assert "tower" in body.lower() or "spark" in body.lower()
class TestSparkSnapshot:
"""Tests for _spark_snapshot helper."""
@patch("dashboard.routes.tower.spark_engine", new_callable=_mock_spark_engine)
def test_snapshot_structure(self, mock_engine):
from dashboard.routes.tower import _spark_snapshot
snap = _spark_snapshot()
assert snap["type"] == "spark_state"
assert "status" in snap
assert "events" in snap
assert "predictions" in snap
assert "advisories" in snap
@patch("dashboard.routes.tower.spark_engine", new_callable=_mock_spark_engine)
def test_snapshot_events_parsed(self, mock_engine):
from dashboard.routes.tower import _spark_snapshot
snap = _spark_snapshot()
ev = snap["events"][0]
assert ev["event_type"] == "task_completed"
assert ev["importance"] == 0.8
assert ev["agent_id"] == "agent-12"
assert ev["task_id"] == "task-567"
assert ev["data"] == {"result": "ok"}
@patch("dashboard.routes.tower.spark_engine", new_callable=_mock_spark_engine)
def test_snapshot_predictions_parsed(self, mock_engine):
from dashboard.routes.tower import _spark_snapshot
snap = _spark_snapshot()
pred = snap["predictions"][0]
assert pred["task_id"] == "task-567"
assert pred["accuracy"] == 0.9
assert pred["evaluated"] is True
assert pred["predicted"] == {"outcome": "success"}
@patch("dashboard.routes.tower.spark_engine", new_callable=_mock_spark_engine)
def test_snapshot_advisories_parsed(self, mock_engine):
from dashboard.routes.tower import _spark_snapshot
snap = _spark_snapshot()
adv = snap["advisories"][0]
assert adv["category"] == "performance"
assert adv["priority"] == "high"
assert adv["title"] == "Slow tasks"
assert adv["suggested_action"] == "Scale up workers"
@patch("dashboard.routes.tower.spark_engine")
def test_snapshot_handles_empty_state(self, mock_engine):
mock_engine.status.return_value = {"enabled": False}
mock_engine.get_timeline.return_value = []
mock_engine.get_predictions.return_value = []
mock_engine.get_advisories.return_value = []
from dashboard.routes.tower import _spark_snapshot
snap = _spark_snapshot()
assert snap["events"] == []
assert snap["predictions"] == []
assert snap["advisories"] == []
@patch("dashboard.routes.tower.spark_engine")
def test_snapshot_handles_invalid_json_data(self, mock_engine):
mock_engine.status.return_value = {"enabled": True}
event = MagicMock()
event.event_type = "test"
event.description = "bad data"
event.importance = 0.5
event.created_at = "2026-01-01T00:00:00"
event.agent_id = None
event.task_id = None
event.data = "not-json{"
mock_engine.get_timeline.return_value = [event]
pred = MagicMock()
pred.task_id = None
pred.accuracy = None
pred.evaluated_at = None
pred.created_at = "2026-01-01T00:00:00"
pred.predicted_value = None
mock_engine.get_predictions.return_value = [pred]
mock_engine.get_advisories.return_value = []
from dashboard.routes.tower import _spark_snapshot
snap = _spark_snapshot()
ev = snap["events"][0]
assert ev["data"] == {}
assert "agent_id" not in ev
assert "task_id" not in ev
pred = snap["predictions"][0]
assert pred["task_id"] == "?"
assert pred["predicted"] == {}
class TestTowerWebSocket:
"""Tests for WS /tower/ws endpoint."""
@patch("dashboard.routes.tower.spark_engine", new_callable=_mock_spark_engine)
@patch("dashboard.routes.tower._PUSH_INTERVAL", 0)
def test_ws_sends_initial_snapshot(self, mock_engine, client):
import json
with client.websocket_connect("/tower/ws") as ws:
data = json.loads(ws.receive_text())
assert data["type"] == "spark_state"
assert "status" in data
assert "events" in data

View File

@@ -1188,3 +1188,42 @@ def test_references_real_files_blocks_mixed(tmp_path):
# Mix of real and fake files — should fail because of the fake one
text = "Fix src/timmy/thinking.py and also src/timmy/nonexistent_module.py for the memory leak."
assert ThinkingEngine._references_real_files(text) is False
# ---------------------------------------------------------------------------
# Sensitive-pattern regression: max_tokens must NOT be flagged (#625)
# ---------------------------------------------------------------------------
def test_sensitive_patterns_allow_max_tokens():
"""_SENSITIVE_RE should not flag 'max_tokens' as sensitive (#625)."""
from timmy.thinking import _SENSITIVE_RE
safe_facts = [
"The cascade router passes max_tokens to Ollama provider.",
"max_tokens=request.max_tokens in the completion call.",
"num_tokens defaults to 2048.",
"total_prompt_tokens is tracked in stats.",
]
for fact in safe_facts:
assert not any(pat.search(fact) for pat in _SENSITIVE_RE), (
f"False positive: {fact!r} was flagged as sensitive"
)
def test_sensitive_patterns_still_block_real_secrets():
"""_SENSITIVE_RE should still block actual secrets."""
from timmy.thinking import _SENSITIVE_RE
dangerous_facts = [
"The token is abc123def456.",
"Set password to hunter2.",
"api_key = sk-live-xyz",
"Found credential in .env file.",
"access_token expired yesterday.",
"private_key stored in vault.",
]
for fact in dangerous_facts:
assert any(pat.search(fact) for pat in _SENSITIVE_RE), (
f"Missed secret: {fact!r} was NOT flagged as sensitive"
)