Merge branch 'main' into refactor/151-maybe-distill
All checks were successful
Tests / lint (pull_request) Successful in 3s
Tests / test (pull_request) Successful in 1m23s

This commit is contained in:
2026-03-15 11:23:36 -04:00
17 changed files with 773 additions and 543 deletions

View File

@@ -3,6 +3,7 @@
import asyncio
import logging
import sqlite3
from contextlib import closing
from pathlib import Path
from fastapi import APIRouter, Request
@@ -39,56 +40,50 @@ def _query_database(db_path: str) -> dict:
"""Open a database read-only and return all tables with their rows."""
result = {"tables": {}, "error": None}
try:
conn = sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)
conn.row_factory = sqlite3.Row
except Exception as exc:
result["error"] = str(exc)
return result
with closing(sqlite3.connect(f"file:{db_path}?mode=ro", uri=True)) as conn:
conn.row_factory = sqlite3.Row
try:
tables = conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
).fetchall()
for (table_name,) in tables:
try:
rows = conn.execute(
f"SELECT * FROM [{table_name}] LIMIT {MAX_ROWS}" # noqa: S608
).fetchall()
columns = (
[
desc[0]
for desc in conn.execute(
f"SELECT * FROM [{table_name}] LIMIT 0"
).description
]
if rows
else []
) # noqa: S608
if not columns and rows:
columns = list(rows[0].keys())
elif not columns:
# Get columns even for empty tables
cursor = conn.execute(f"PRAGMA table_info([{table_name}])") # noqa: S608
columns = [r[1] for r in cursor.fetchall()]
count = conn.execute(f"SELECT COUNT(*) FROM [{table_name}]").fetchone()[0] # noqa: S608
result["tables"][table_name] = {
"columns": columns,
"rows": [dict(r) for r in rows],
"total_count": count,
"truncated": count > MAX_ROWS,
}
except Exception as exc:
result["tables"][table_name] = {
"error": str(exc),
"columns": [],
"rows": [],
"total_count": 0,
"truncated": False,
}
tables = conn.execute(
"SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
).fetchall()
for (table_name,) in tables:
try:
rows = conn.execute(
f"SELECT * FROM [{table_name}] LIMIT {MAX_ROWS}" # noqa: S608
).fetchall()
columns = (
[
desc[0]
for desc in conn.execute(
f"SELECT * FROM [{table_name}] LIMIT 0"
).description
]
if rows
else []
) # noqa: S608
if not columns and rows:
columns = list(rows[0].keys())
elif not columns:
# Get columns even for empty tables
cursor = conn.execute(f"PRAGMA table_info([{table_name}])") # noqa: S608
columns = [r[1] for r in cursor.fetchall()]
count = conn.execute(f"SELECT COUNT(*) FROM [{table_name}]").fetchone()[0] # noqa: S608
result["tables"][table_name] = {
"columns": columns,
"rows": [dict(r) for r in rows],
"total_count": count,
"truncated": count > MAX_ROWS,
}
except Exception as exc:
result["tables"][table_name] = {
"error": str(exc),
"columns": [],
"rows": [],
"total_count": 0,
"truncated": False,
}
except Exception as exc:
result["error"] = str(exc)
finally:
conn.close()
return result

View File

@@ -6,8 +6,11 @@ for the Mission Control dashboard.
import asyncio
import logging
import sqlite3
import time
from contextlib import closing
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
from fastapi import APIRouter, Request
@@ -134,13 +137,9 @@ def _check_lightning() -> DependencyStatus:
def _check_sqlite() -> DependencyStatus:
"""Check SQLite database status."""
try:
import sqlite3
from pathlib import Path
db_path = Path(settings.repo_root) / "data" / "timmy.db"
conn = sqlite3.connect(str(db_path))
conn.execute("SELECT 1")
conn.close()
with closing(sqlite3.connect(str(db_path))) as conn:
conn.execute("SELECT 1")
return DependencyStatus(
name="SQLite Database",

View File

@@ -3,6 +3,7 @@
import logging
import sqlite3
import uuid
from contextlib import closing
from datetime import datetime
from pathlib import Path
@@ -101,8 +102,7 @@ class _TaskView:
@router.get("/tasks", response_class=HTMLResponse)
async def tasks_page(request: Request):
"""Render the main task queue page with 3-column layout."""
db = _get_db()
try:
with closing(_get_db()) as db:
pending = [
_TaskView(_row_to_dict(r))
for r in db.execute(
@@ -121,8 +121,6 @@ async def tasks_page(request: Request):
"SELECT * FROM tasks WHERE status IN ('completed','vetoed','failed') ORDER BY completed_at DESC LIMIT 50"
).fetchall()
]
finally:
db.close()
return templates.TemplateResponse(
request,
@@ -145,13 +143,10 @@ async def tasks_page(request: Request):
@router.get("/tasks/pending", response_class=HTMLResponse)
async def tasks_pending(request: Request):
db = _get_db()
try:
with closing(_get_db()) as db:
rows = db.execute(
"SELECT * FROM tasks WHERE status='pending_approval' ORDER BY created_at DESC"
).fetchall()
finally:
db.close()
tasks = [_TaskView(_row_to_dict(r)) for r in rows]
parts = []
for task in tasks:
@@ -167,13 +162,10 @@ async def tasks_pending(request: Request):
@router.get("/tasks/active", response_class=HTMLResponse)
async def tasks_active(request: Request):
db = _get_db()
try:
with closing(_get_db()) as db:
rows = db.execute(
"SELECT * FROM tasks WHERE status IN ('approved','running','paused') ORDER BY created_at DESC"
).fetchall()
finally:
db.close()
tasks = [_TaskView(_row_to_dict(r)) for r in rows]
parts = []
for task in tasks:
@@ -189,13 +181,10 @@ async def tasks_active(request: Request):
@router.get("/tasks/completed", response_class=HTMLResponse)
async def tasks_completed(request: Request):
db = _get_db()
try:
with closing(_get_db()) as db:
rows = db.execute(
"SELECT * FROM tasks WHERE status IN ('completed','vetoed','failed') ORDER BY completed_at DESC LIMIT 50"
).fetchall()
finally:
db.close()
tasks = [_TaskView(_row_to_dict(r)) for r in rows]
parts = []
for task in tasks:
@@ -231,16 +220,13 @@ async def create_task_form(
now = datetime.utcnow().isoformat()
priority = priority if priority in VALID_PRIORITIES else "normal"
db = _get_db()
try:
with closing(_get_db()) as db:
db.execute(
"INSERT INTO tasks (id, title, description, priority, assigned_to, created_at) VALUES (?, ?, ?, ?, ?, ?)",
(task_id, title, description, priority, assigned_to, now),
)
db.commit()
row = db.execute("SELECT * FROM tasks WHERE id=?", (task_id,)).fetchone()
finally:
db.close()
task = _TaskView(_row_to_dict(row))
return templates.TemplateResponse(request, "partials/task_card.html", {"task": task})
@@ -283,16 +269,13 @@ async def modify_task(
title: str = Form(...),
description: str = Form(""),
):
db = _get_db()
try:
with closing(_get_db()) as db:
db.execute(
"UPDATE tasks SET title=?, description=? WHERE id=?",
(title, description, task_id),
)
db.commit()
row = db.execute("SELECT * FROM tasks WHERE id=?", (task_id,)).fetchone()
finally:
db.close()
if not row:
raise HTTPException(404, "Task not found")
task = _TaskView(_row_to_dict(row))
@@ -304,16 +287,13 @@ async def _set_status(request: Request, task_id: str, new_status: str):
completed_at = (
datetime.utcnow().isoformat() if new_status in ("completed", "vetoed", "failed") else None
)
db = _get_db()
try:
with closing(_get_db()) as db:
db.execute(
"UPDATE tasks SET status=?, completed_at=COALESCE(?, completed_at) WHERE id=?",
(new_status, completed_at, task_id),
)
db.commit()
row = db.execute("SELECT * FROM tasks WHERE id=?", (task_id,)).fetchone()
finally:
db.close()
if not row:
raise HTTPException(404, "Task not found")
task = _TaskView(_row_to_dict(row))
@@ -339,8 +319,7 @@ async def api_create_task(request: Request):
if priority not in VALID_PRIORITIES:
priority = "normal"
db = _get_db()
try:
with closing(_get_db()) as db:
db.execute(
"INSERT INTO tasks (id, title, description, priority, assigned_to, created_by, created_at) "
"VALUES (?, ?, ?, ?, ?, ?, ?)",
@@ -356,8 +335,6 @@ async def api_create_task(request: Request):
)
db.commit()
row = db.execute("SELECT * FROM tasks WHERE id=?", (task_id,)).fetchone()
finally:
db.close()
return JSONResponse(_row_to_dict(row), status_code=201)
@@ -365,11 +342,8 @@ async def api_create_task(request: Request):
@router.get("/api/tasks", response_class=JSONResponse)
async def api_list_tasks():
"""List all tasks as JSON."""
db = _get_db()
try:
with closing(_get_db()) as db:
rows = db.execute("SELECT * FROM tasks ORDER BY created_at DESC").fetchall()
finally:
db.close()
return JSONResponse([_row_to_dict(r) for r in rows])
@@ -384,16 +358,13 @@ async def api_update_status(task_id: str, request: Request):
completed_at = (
datetime.utcnow().isoformat() if new_status in ("completed", "vetoed", "failed") else None
)
db = _get_db()
try:
with closing(_get_db()) as db:
db.execute(
"UPDATE tasks SET status=?, completed_at=COALESCE(?, completed_at) WHERE id=?",
(new_status, completed_at, task_id),
)
db.commit()
row = db.execute("SELECT * FROM tasks WHERE id=?", (task_id,)).fetchone()
finally:
db.close()
if not row:
raise HTTPException(404, "Task not found")
return JSONResponse(_row_to_dict(row))
@@ -402,12 +373,9 @@ async def api_update_status(task_id: str, request: Request):
@router.delete("/api/tasks/{task_id}", response_class=JSONResponse)
async def api_delete_task(task_id: str):
"""Delete a task."""
db = _get_db()
try:
with closing(_get_db()) as db:
cursor = db.execute("DELETE FROM tasks WHERE id=?", (task_id,))
db.commit()
finally:
db.close()
if cursor.rowcount == 0:
raise HTTPException(404, "Task not found")
return JSONResponse({"success": True, "id": task_id})
@@ -421,8 +389,7 @@ async def api_delete_task(task_id: str):
@router.get("/api/queue/status", response_class=JSONResponse)
async def queue_status(assigned_to: str = "default"):
"""Return queue status for the chat panel's agent status indicator."""
db = _get_db()
try:
with closing(_get_db()) as db:
running = db.execute(
"SELECT * FROM tasks WHERE status='running' AND assigned_to=? LIMIT 1",
(assigned_to,),
@@ -431,8 +398,6 @@ async def queue_status(assigned_to: str = "default"):
"SELECT COUNT(*) as cnt FROM tasks WHERE status IN ('pending_approval','approved') AND assigned_to=?",
(assigned_to,),
).fetchone()
finally:
db.close()
if running:
return JSONResponse(

View File

@@ -3,6 +3,7 @@
import logging
import sqlite3
import uuid
from contextlib import closing
from datetime import datetime
from pathlib import Path
@@ -104,14 +105,11 @@ def _query_wos(db, statuses):
@router.get("/work-orders/queue", response_class=HTMLResponse)
async def work_orders_page(request: Request):
db = _get_db()
try:
with closing(_get_db()) as db:
pending = _query_wos(db, ["submitted", "triaged"])
active = _query_wos(db, ["approved", "in_progress"])
completed = _query_wos(db, ["completed"])
rejected = _query_wos(db, ["rejected"])
finally:
db.close()
return templates.TemplateResponse(
request,
@@ -148,8 +146,7 @@ async def submit_work_order(
priority = priority if priority in PRIORITIES else "medium"
category = category if category in CATEGORIES else "suggestion"
db = _get_db()
try:
with closing(_get_db()) as db:
db.execute(
"INSERT INTO work_orders (id, title, description, priority, category, submitter, related_files, created_at) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
@@ -157,8 +154,6 @@ async def submit_work_order(
)
db.commit()
row = db.execute("SELECT * FROM work_orders WHERE id=?", (wo_id,)).fetchone()
finally:
db.close()
wo = _WOView(_row_to_dict(row))
return templates.TemplateResponse(request, "partials/work_order_card.html", {"wo": wo})
@@ -171,11 +166,8 @@ async def submit_work_order(
@router.get("/work-orders/queue/pending", response_class=HTMLResponse)
async def pending_partial(request: Request):
db = _get_db()
try:
with closing(_get_db()) as db:
wos = _query_wos(db, ["submitted", "triaged"])
finally:
db.close()
if not wos:
return HTMLResponse(
'<div style="color: var(--text-muted); font-size: 0.8rem; padding: 12px 0;">'
@@ -193,11 +185,8 @@ async def pending_partial(request: Request):
@router.get("/work-orders/queue/active", response_class=HTMLResponse)
async def active_partial(request: Request):
db = _get_db()
try:
with closing(_get_db()) as db:
wos = _query_wos(db, ["approved", "in_progress"])
finally:
db.close()
if not wos:
return HTMLResponse(
'<div style="color: var(--text-muted); font-size: 0.8rem; padding: 12px 0;">'
@@ -222,8 +211,7 @@ async def _update_status(request: Request, wo_id: str, new_status: str, **extra)
completed_at = (
datetime.utcnow().isoformat() if new_status in ("completed", "rejected") else None
)
db = _get_db()
try:
with closing(_get_db()) as db:
sets = ["status=?", "completed_at=COALESCE(?, completed_at)"]
vals = [new_status, completed_at]
for col, val in extra.items():
@@ -233,8 +221,6 @@ async def _update_status(request: Request, wo_id: str, new_status: str, **extra)
db.execute(f"UPDATE work_orders SET {', '.join(sets)} WHERE id=?", vals)
db.commit()
row = db.execute("SELECT * FROM work_orders WHERE id=?", (wo_id,)).fetchone()
finally:
db.close()
if not row:
raise HTTPException(404, "Work order not found")
wo = _WOView(_row_to_dict(row))

View File

@@ -10,6 +10,7 @@ import json
import logging
import sqlite3
from collections.abc import Callable, Coroutine
from contextlib import closing
from dataclasses import dataclass, field
from datetime import UTC, datetime
from pathlib import Path
@@ -99,14 +100,11 @@ class EventBus:
if self._persistence_db_path is None:
return
self._persistence_db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(self._persistence_db_path))
try:
with closing(sqlite3.connect(str(self._persistence_db_path))) as conn:
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA busy_timeout=5000")
conn.executescript(_EVENTS_SCHEMA)
conn.commit()
finally:
conn.close()
def _get_persistence_conn(self) -> sqlite3.Connection | None:
"""Get a connection to the persistence database."""
@@ -123,27 +121,26 @@ class EventBus:
if conn is None:
return
try:
task_id = event.data.get("task_id", "")
agent_id = event.data.get("agent_id", "")
conn.execute(
"INSERT OR IGNORE INTO events "
"(id, event_type, source, task_id, agent_id, data, timestamp) "
"VALUES (?, ?, ?, ?, ?, ?, ?)",
(
event.id,
event.type,
event.source,
task_id,
agent_id,
json.dumps(event.data),
event.timestamp,
),
)
conn.commit()
with closing(conn):
task_id = event.data.get("task_id", "")
agent_id = event.data.get("agent_id", "")
conn.execute(
"INSERT OR IGNORE INTO events "
"(id, event_type, source, task_id, agent_id, data, timestamp) "
"VALUES (?, ?, ?, ?, ?, ?, ?)",
(
event.id,
event.type,
event.source,
task_id,
agent_id,
json.dumps(event.data),
event.timestamp,
),
)
conn.commit()
except Exception as exc:
logger.debug("Failed to persist event: %s", exc)
finally:
conn.close()
# ── Replay ───────────────────────────────────────────────────────────
@@ -170,40 +167,39 @@ class EventBus:
return []
try:
conditions = []
params: list = []
with closing(conn):
conditions = []
params: list = []
if event_type:
conditions.append("event_type = ?")
params.append(event_type)
if source:
conditions.append("source = ?")
params.append(source)
if task_id:
conditions.append("task_id = ?")
params.append(task_id)
if event_type:
conditions.append("event_type = ?")
params.append(event_type)
if source:
conditions.append("source = ?")
params.append(source)
if task_id:
conditions.append("task_id = ?")
params.append(task_id)
where = " AND ".join(conditions) if conditions else "1=1"
sql = f"SELECT * FROM events WHERE {where} ORDER BY timestamp DESC LIMIT ?"
params.append(limit)
where = " AND ".join(conditions) if conditions else "1=1"
sql = f"SELECT * FROM events WHERE {where} ORDER BY timestamp DESC LIMIT ?"
params.append(limit)
rows = conn.execute(sql, params).fetchall()
rows = conn.execute(sql, params).fetchall()
return [
Event(
id=row["id"],
type=row["event_type"],
source=row["source"],
data=json.loads(row["data"]) if row["data"] else {},
timestamp=row["timestamp"],
)
for row in rows
]
return [
Event(
id=row["id"],
type=row["event_type"],
source=row["source"],
data=json.loads(row["data"]) if row["data"] else {},
timestamp=row["timestamp"],
)
for row in rows
]
except Exception as exc:
logger.debug("Failed to replay events: %s", exc)
return []
finally:
conn.close()
# ── Subscribe / Publish ──────────────────────────────────────────────

View File

@@ -11,6 +11,7 @@ model roles (student, teacher, judge/PRM) run on dedicated resources.
import logging
import sqlite3
import threading
from contextlib import closing
from dataclasses import dataclass
from datetime import UTC, datetime
from enum import StrEnum
@@ -105,23 +106,22 @@ class ModelRegistry:
def _load_from_db(self) -> None:
"""Bootstrap cache from SQLite."""
try:
conn = _get_conn()
for row in conn.execute("SELECT * FROM custom_models WHERE active = 1").fetchall():
self._models[row["name"]] = CustomModel(
name=row["name"],
format=ModelFormat(row["format"]),
path=row["path"],
role=ModelRole(row["role"]),
context_window=row["context_window"],
description=row["description"],
registered_at=row["registered_at"],
active=bool(row["active"]),
default_temperature=row["default_temperature"],
max_tokens=row["max_tokens"],
)
for row in conn.execute("SELECT * FROM agent_model_assignments").fetchall():
self._agent_assignments[row["agent_id"]] = row["model_name"]
conn.close()
with closing(_get_conn()) as conn:
for row in conn.execute("SELECT * FROM custom_models WHERE active = 1").fetchall():
self._models[row["name"]] = CustomModel(
name=row["name"],
format=ModelFormat(row["format"]),
path=row["path"],
role=ModelRole(row["role"]),
context_window=row["context_window"],
description=row["description"],
registered_at=row["registered_at"],
active=bool(row["active"]),
default_temperature=row["default_temperature"],
max_tokens=row["max_tokens"],
)
for row in conn.execute("SELECT * FROM agent_model_assignments").fetchall():
self._agent_assignments[row["agent_id"]] = row["model_name"]
except Exception as exc:
logger.warning("Failed to load model registry from DB: %s", exc)
@@ -130,29 +130,28 @@ class ModelRegistry:
def register(self, model: CustomModel) -> CustomModel:
"""Register a new custom model."""
with self._lock:
conn = _get_conn()
conn.execute(
"""
INSERT OR REPLACE INTO custom_models
(name, format, path, role, context_window, description,
registered_at, active, default_temperature, max_tokens)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
model.name,
model.format.value,
model.path,
model.role.value,
model.context_window,
model.description,
model.registered_at,
int(model.active),
model.default_temperature,
model.max_tokens,
),
)
conn.commit()
conn.close()
with closing(_get_conn()) as conn:
conn.execute(
"""
INSERT OR REPLACE INTO custom_models
(name, format, path, role, context_window, description,
registered_at, active, default_temperature, max_tokens)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
model.name,
model.format.value,
model.path,
model.role.value,
model.context_window,
model.description,
model.registered_at,
int(model.active),
model.default_temperature,
model.max_tokens,
),
)
conn.commit()
self._models[model.name] = model
logger.info("Registered model: %s (%s)", model.name, model.format.value)
return model
@@ -162,11 +161,10 @@ class ModelRegistry:
with self._lock:
if name not in self._models:
return False
conn = _get_conn()
conn.execute("DELETE FROM custom_models WHERE name = ?", (name,))
conn.execute("DELETE FROM agent_model_assignments WHERE model_name = ?", (name,))
conn.commit()
conn.close()
with closing(_get_conn()) as conn:
conn.execute("DELETE FROM custom_models WHERE name = ?", (name,))
conn.execute("DELETE FROM agent_model_assignments WHERE model_name = ?", (name,))
conn.commit()
del self._models[name]
# Remove any agent assignments using this model
self._agent_assignments = {
@@ -193,13 +191,12 @@ class ModelRegistry:
return False
with self._lock:
model.active = active
conn = _get_conn()
conn.execute(
"UPDATE custom_models SET active = ? WHERE name = ?",
(int(active), name),
)
conn.commit()
conn.close()
with closing(_get_conn()) as conn:
conn.execute(
"UPDATE custom_models SET active = ? WHERE name = ?",
(int(active), name),
)
conn.commit()
return True
# ── Agent-model assignments ────────────────────────────────────────────
@@ -210,17 +207,16 @@ class ModelRegistry:
return False
with self._lock:
now = datetime.now(UTC).isoformat()
conn = _get_conn()
conn.execute(
"""
INSERT OR REPLACE INTO agent_model_assignments
(agent_id, model_name, assigned_at)
VALUES (?, ?, ?)
""",
(agent_id, model_name, now),
)
conn.commit()
conn.close()
with closing(_get_conn()) as conn:
conn.execute(
"""
INSERT OR REPLACE INTO agent_model_assignments
(agent_id, model_name, assigned_at)
VALUES (?, ?, ?)
""",
(agent_id, model_name, now),
)
conn.commit()
self._agent_assignments[agent_id] = model_name
logger.info("Assigned model %s to agent %s", model_name, agent_id)
return True
@@ -230,13 +226,12 @@ class ModelRegistry:
with self._lock:
if agent_id not in self._agent_assignments:
return False
conn = _get_conn()
conn.execute(
"DELETE FROM agent_model_assignments WHERE agent_id = ?",
(agent_id,),
)
conn.commit()
conn.close()
with closing(_get_conn()) as conn:
conn.execute(
"DELETE FROM agent_model_assignments WHERE agent_id = ?",
(agent_id,),
)
conn.commit()
del self._agent_assignments[agent_id]
return True

View File

@@ -13,6 +13,7 @@ Default is always True. The owner changes this intentionally.
import sqlite3
import uuid
from contextlib import closing
from dataclasses import dataclass
from datetime import UTC, datetime, timedelta
from pathlib import Path
@@ -96,80 +97,73 @@ def create_item(
created_at=datetime.now(UTC),
status="pending",
)
conn = _get_conn(db_path)
conn.execute(
"""
INSERT INTO approval_items
(id, title, description, proposed_action, impact, created_at, status)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
item.id,
item.title,
item.description,
item.proposed_action,
item.impact,
item.created_at.isoformat(),
item.status,
),
)
conn.commit()
conn.close()
with closing(_get_conn(db_path)) as conn:
conn.execute(
"""
INSERT INTO approval_items
(id, title, description, proposed_action, impact, created_at, status)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
item.id,
item.title,
item.description,
item.proposed_action,
item.impact,
item.created_at.isoformat(),
item.status,
),
)
conn.commit()
return item
def list_pending(db_path: Path = _DEFAULT_DB) -> list[ApprovalItem]:
"""Return all pending approval items, newest first."""
conn = _get_conn(db_path)
rows = conn.execute(
"SELECT * FROM approval_items WHERE status = 'pending' ORDER BY created_at DESC"
).fetchall()
conn.close()
with closing(_get_conn(db_path)) as conn:
rows = conn.execute(
"SELECT * FROM approval_items WHERE status = 'pending' ORDER BY created_at DESC"
).fetchall()
return [_row_to_item(r) for r in rows]
def list_all(db_path: Path = _DEFAULT_DB) -> list[ApprovalItem]:
"""Return all approval items regardless of status, newest first."""
conn = _get_conn(db_path)
rows = conn.execute("SELECT * FROM approval_items ORDER BY created_at DESC").fetchall()
conn.close()
with closing(_get_conn(db_path)) as conn:
rows = conn.execute("SELECT * FROM approval_items ORDER BY created_at DESC").fetchall()
return [_row_to_item(r) for r in rows]
def get_item(item_id: str, db_path: Path = _DEFAULT_DB) -> ApprovalItem | None:
conn = _get_conn(db_path)
row = conn.execute("SELECT * FROM approval_items WHERE id = ?", (item_id,)).fetchone()
conn.close()
with closing(_get_conn(db_path)) as conn:
row = conn.execute("SELECT * FROM approval_items WHERE id = ?", (item_id,)).fetchone()
return _row_to_item(row) if row else None
def approve(item_id: str, db_path: Path = _DEFAULT_DB) -> ApprovalItem | None:
"""Mark an approval item as approved."""
conn = _get_conn(db_path)
conn.execute("UPDATE approval_items SET status = 'approved' WHERE id = ?", (item_id,))
conn.commit()
conn.close()
with closing(_get_conn(db_path)) as conn:
conn.execute("UPDATE approval_items SET status = 'approved' WHERE id = ?", (item_id,))
conn.commit()
return get_item(item_id, db_path)
def reject(item_id: str, db_path: Path = _DEFAULT_DB) -> ApprovalItem | None:
"""Mark an approval item as rejected."""
conn = _get_conn(db_path)
conn.execute("UPDATE approval_items SET status = 'rejected' WHERE id = ?", (item_id,))
conn.commit()
conn.close()
with closing(_get_conn(db_path)) as conn:
conn.execute("UPDATE approval_items SET status = 'rejected' WHERE id = ?", (item_id,))
conn.commit()
return get_item(item_id, db_path)
def expire_old(db_path: Path = _DEFAULT_DB) -> int:
"""Auto-expire pending items older than EXPIRY_DAYS. Returns count removed."""
cutoff = (datetime.now(UTC) - timedelta(days=_EXPIRY_DAYS)).isoformat()
conn = _get_conn(db_path)
cursor = conn.execute(
"DELETE FROM approval_items WHERE status = 'pending' AND created_at < ?",
(cutoff,),
)
conn.commit()
count = cursor.rowcount
conn.close()
with closing(_get_conn(db_path)) as conn:
cursor = conn.execute(
"DELETE FROM approval_items WHERE status = 'pending' AND created_at < ?",
(cutoff,),
)
conn.commit()
count = cursor.rowcount
return count

View File

@@ -37,6 +37,7 @@ class RunResult:
"""Minimal Agno-compatible run result — carries the model's response text."""
content: str
confidence: float | None = None
def is_apple_silicon() -> bool:

View File

@@ -10,6 +10,7 @@ regenerates the briefing every 6 hours.
import logging
import sqlite3
from contextlib import closing
from dataclasses import dataclass, field
from datetime import UTC, datetime, timedelta
from pathlib import Path
@@ -74,28 +75,26 @@ def _get_cache_conn(db_path: Path = _DEFAULT_DB) -> sqlite3.Connection:
def _save_briefing(briefing: Briefing, db_path: Path = _DEFAULT_DB) -> None:
conn = _get_cache_conn(db_path)
conn.execute(
"""
INSERT INTO briefings (generated_at, period_start, period_end, summary)
VALUES (?, ?, ?, ?)
""",
(
briefing.generated_at.isoformat(),
briefing.period_start.isoformat(),
briefing.period_end.isoformat(),
briefing.summary,
),
)
conn.commit()
conn.close()
with closing(_get_cache_conn(db_path)) as conn:
conn.execute(
"""
INSERT INTO briefings (generated_at, period_start, period_end, summary)
VALUES (?, ?, ?, ?)
""",
(
briefing.generated_at.isoformat(),
briefing.period_start.isoformat(),
briefing.period_end.isoformat(),
briefing.summary,
),
)
conn.commit()
def _load_latest(db_path: Path = _DEFAULT_DB) -> Briefing | None:
"""Load the most-recently cached briefing, or None if there is none."""
conn = _get_cache_conn(db_path)
row = conn.execute("SELECT * FROM briefings ORDER BY generated_at DESC LIMIT 1").fetchone()
conn.close()
with closing(_get_cache_conn(db_path)) as conn:
row = conn.execute("SELECT * FROM briefings ORDER BY generated_at DESC LIMIT 1").fetchone()
if row is None:
return None
return Briefing(
@@ -129,27 +128,25 @@ def _gather_swarm_summary(since: datetime) -> str:
return "No swarm activity recorded yet."
try:
conn = sqlite3.connect(str(swarm_db))
conn.row_factory = sqlite3.Row
with closing(sqlite3.connect(str(swarm_db))) as conn:
conn.row_factory = sqlite3.Row
since_iso = since.isoformat()
since_iso = since.isoformat()
completed = conn.execute(
"SELECT COUNT(*) as c FROM tasks WHERE status = 'completed' AND created_at > ?",
(since_iso,),
).fetchone()["c"]
completed = conn.execute(
"SELECT COUNT(*) as c FROM tasks WHERE status = 'completed' AND created_at > ?",
(since_iso,),
).fetchone()["c"]
failed = conn.execute(
"SELECT COUNT(*) as c FROM tasks WHERE status = 'failed' AND created_at > ?",
(since_iso,),
).fetchone()["c"]
failed = conn.execute(
"SELECT COUNT(*) as c FROM tasks WHERE status = 'failed' AND created_at > ?",
(since_iso,),
).fetchone()["c"]
agents = conn.execute(
"SELECT COUNT(*) as c FROM agents WHERE registered_at > ?",
(since_iso,),
).fetchone()["c"]
conn.close()
agents = conn.execute(
"SELECT COUNT(*) as c FROM agents WHERE registered_at > ?",
(since_iso,),
).fetchone()["c"]
parts = []
if completed:

128
src/timmy/confidence.py Normal file
View File

@@ -0,0 +1,128 @@
"""Confidence estimation for Timmy's responses.
Implements SOUL.md requirement: "When I am uncertain, I must say so in
proportion to my uncertainty."
This module provides heuristics to estimate confidence based on linguistic
signals in the response text. It measures uncertainty without modifying
the response content.
"""
import re
# Hedging words that indicate uncertainty
HEDGING_WORDS = [
"i think",
"maybe",
"perhaps",
"not sure",
"might",
"could be",
"possibly",
"i believe",
"approximately",
"roughly",
"probably",
"likely",
"seems",
"appears",
"suggests",
"i guess",
"i suppose",
"sort of",
"kind of",
"somewhat",
"fairly",
"relatively",
"i'm not certain",
"i am not certain",
"uncertain",
"unclear",
]
# Certainty words that indicate confidence
CERTAINTY_WORDS = [
"i know",
"definitely",
"certainly",
"the answer is",
"specifically",
"exactly",
"absolutely",
"without doubt",
"i am certain",
"i'm certain",
"it is true that",
"fact is",
"in fact",
"indeed",
"undoubtedly",
"clearly",
"obviously",
"conclusively",
]
# Very low confidence indicators (direct admissions of ignorance)
LOW_CONFIDENCE_PATTERNS = [
r"i\s+(?:don't|do not)\s+know",
r"i\s+(?:am|I'm|i'm)\s+(?:not\s+sure|unsure)",
r"i\s+have\s+no\s+(?:idea|clue)",
r"i\s+cannot\s+(?:say|tell|answer)",
r"i\s+can't\s+(?:say|tell|answer)",
]
def estimate_confidence(text: str) -> float:
"""Estimate confidence level of a response based on linguistic signals.
Analyzes the text for hedging words (reducing confidence) and certainty
words (increasing confidence). Returns a score between 0.0 and 1.0.
Args:
text: The response text to analyze.
Returns:
A float between 0.0 (very uncertain) and 1.0 (very confident).
"""
if not text or not text.strip():
return 0.0
text_lower = text.lower().strip()
confidence = 0.5 # Start with neutral confidence
# Check for direct admissions of ignorance (very low confidence)
for pattern in LOW_CONFIDENCE_PATTERNS:
if re.search(pattern, text_lower):
# Direct admission of not knowing - very low confidence
confidence = 0.15
break
# Count hedging words (reduce confidence)
hedging_count = 0
for hedge in HEDGING_WORDS:
if hedge in text_lower:
hedging_count += 1
# Count certainty words (increase confidence)
certainty_count = 0
for certain in CERTAINTY_WORDS:
if certain in text_lower:
certainty_count += 1
# Adjust confidence based on word counts
# Each hedging word reduces confidence by 0.1
# Each certainty word increases confidence by 0.1
confidence -= hedging_count * 0.1
confidence += certainty_count * 0.1
# Short factual answers get a small boost
word_count = len(text.split())
if word_count <= 5 and confidence > 0.3:
confidence += 0.1
# Questions in response indicate uncertainty
if "?" in text:
confidence -= 0.15
# Clamp to valid range
return max(0.0, min(1.0, confidence))

View File

@@ -25,6 +25,7 @@ import os
import shutil
import sqlite3
import uuid
from contextlib import closing
from datetime import datetime
from pathlib import Path
@@ -163,37 +164,36 @@ def _bridge_to_work_order(title: str, body: str, category: str) -> None:
try:
db_path = Path(settings.repo_root) / "data" / "work_orders.db"
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(db_path))
conn.execute(
"""CREATE TABLE IF NOT EXISTS work_orders (
id TEXT PRIMARY KEY,
title TEXT NOT NULL,
description TEXT DEFAULT '',
priority TEXT DEFAULT 'medium',
category TEXT DEFAULT 'suggestion',
submitter TEXT DEFAULT 'dashboard',
related_files TEXT DEFAULT '',
status TEXT DEFAULT 'submitted',
result TEXT DEFAULT '',
rejection_reason TEXT DEFAULT '',
created_at TEXT DEFAULT (datetime('now')),
completed_at TEXT
)"""
)
conn.execute(
"INSERT INTO work_orders (id, title, description, category, submitter, created_at) "
"VALUES (?, ?, ?, ?, ?, ?)",
(
str(uuid.uuid4()),
title,
body,
category,
"timmy-thinking",
datetime.utcnow().isoformat(),
),
)
conn.commit()
conn.close()
with closing(sqlite3.connect(str(db_path))) as conn:
conn.execute(
"""CREATE TABLE IF NOT EXISTS work_orders (
id TEXT PRIMARY KEY,
title TEXT NOT NULL,
description TEXT DEFAULT '',
priority TEXT DEFAULT 'medium',
category TEXT DEFAULT 'suggestion',
submitter TEXT DEFAULT 'dashboard',
related_files TEXT DEFAULT '',
status TEXT DEFAULT 'submitted',
result TEXT DEFAULT '',
rejection_reason TEXT DEFAULT '',
created_at TEXT DEFAULT (datetime('now')),
completed_at TEXT
)"""
)
conn.execute(
"INSERT INTO work_orders (id, title, description, category, submitter, created_at) "
"VALUES (?, ?, ?, ?, ?, ?)",
(
str(uuid.uuid4()),
title,
body,
category,
"timmy-thinking",
datetime.utcnow().isoformat(),
),
)
conn.commit()
except Exception as exc:
logger.debug("Work order bridge failed: %s", exc)

View File

@@ -15,6 +15,7 @@ import hashlib
import json
import logging
import sqlite3
from contextlib import closing
from dataclasses import dataclass
from datetime import UTC, datetime
from pathlib import Path
@@ -113,20 +114,19 @@ class SemanticMemory:
def _init_db(self) -> None:
"""Initialize SQLite with vector storage."""
self.db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(self.db_path))
conn.execute("""
CREATE TABLE IF NOT EXISTS chunks (
id TEXT PRIMARY KEY,
source TEXT NOT NULL,
content TEXT NOT NULL,
embedding TEXT NOT NULL,
created_at TEXT NOT NULL,
source_hash TEXT NOT NULL
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source)")
conn.commit()
conn.close()
with closing(sqlite3.connect(str(self.db_path))) as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS chunks (
id TEXT PRIMARY KEY,
source TEXT NOT NULL,
content TEXT NOT NULL,
embedding TEXT NOT NULL,
created_at TEXT NOT NULL,
source_hash TEXT NOT NULL
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source)")
conn.commit()
def index_file(self, filepath: Path) -> int:
"""Index a single file into semantic memory."""
@@ -136,39 +136,37 @@ class SemanticMemory:
content = filepath.read_text()
file_hash = hashlib.md5(content.encode()).hexdigest()
# Check if already indexed with same hash
conn = sqlite3.connect(str(self.db_path))
cursor = conn.execute(
"SELECT source_hash FROM chunks WHERE source = ? LIMIT 1", (str(filepath),)
)
existing = cursor.fetchone()
if existing and existing[0] == file_hash:
conn.close()
return 0 # Already indexed
# Delete old chunks for this file
conn.execute("DELETE FROM chunks WHERE source = ?", (str(filepath),))
# Split into chunks (paragraphs)
chunks = self._split_into_chunks(content)
# Index each chunk
now = datetime.now(UTC).isoformat()
for i, chunk_text in enumerate(chunks):
if len(chunk_text.strip()) < 20: # Skip tiny chunks
continue
chunk_id = f"{filepath.stem}_{i}"
embedding = embed_text(chunk_text)
conn.execute(
"""INSERT INTO chunks (id, source, content, embedding, created_at, source_hash)
VALUES (?, ?, ?, ?, ?, ?)""",
(chunk_id, str(filepath), chunk_text, json.dumps(embedding), now, file_hash),
with closing(sqlite3.connect(str(self.db_path))) as conn:
# Check if already indexed with same hash
cursor = conn.execute(
"SELECT source_hash FROM chunks WHERE source = ? LIMIT 1", (str(filepath),)
)
existing = cursor.fetchone()
if existing and existing[0] == file_hash:
return 0 # Already indexed
conn.commit()
conn.close()
# Delete old chunks for this file
conn.execute("DELETE FROM chunks WHERE source = ?", (str(filepath),))
# Split into chunks (paragraphs)
chunks = self._split_into_chunks(content)
# Index each chunk
now = datetime.now(UTC).isoformat()
for i, chunk_text in enumerate(chunks):
if len(chunk_text.strip()) < 20: # Skip tiny chunks
continue
chunk_id = f"{filepath.stem}_{i}"
embedding = embed_text(chunk_text)
conn.execute(
"""INSERT INTO chunks (id, source, content, embedding, created_at, source_hash)
VALUES (?, ?, ?, ?, ?, ?)""",
(chunk_id, str(filepath), chunk_text, json.dumps(embedding), now, file_hash),
)
conn.commit()
logger.info("SemanticMemory: Indexed %s (%d chunks)", filepath.name, len(chunks))
return len(chunks)
@@ -222,13 +220,11 @@ class SemanticMemory:
"""Search for relevant memory chunks."""
query_embedding = embed_text(query)
conn = sqlite3.connect(str(self.db_path))
conn.row_factory = sqlite3.Row
with closing(sqlite3.connect(str(self.db_path))) as conn:
conn.row_factory = sqlite3.Row
# Get all chunks (in production, use vector index)
rows = conn.execute("SELECT source, content, embedding FROM chunks").fetchall()
conn.close()
# Get all chunks (in production, use vector index)
rows = conn.execute("SELECT source, content, embedding FROM chunks").fetchall()
# Calculate similarities
scored = []
@@ -268,10 +264,9 @@ class SemanticMemory:
def stats(self) -> dict:
"""Get indexing statistics."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.execute("SELECT COUNT(*), COUNT(DISTINCT source) FROM chunks")
total_chunks, total_files = cursor.fetchone()
conn.close()
with closing(sqlite3.connect(str(self.db_path))) as conn:
cursor = conn.execute("SELECT COUNT(*), COUNT(DISTINCT source) FROM chunks")
total_chunks, total_files = cursor.fetchone()
return {
"total_chunks": total_chunks,

View File

@@ -38,21 +38,23 @@ class SessionLogger:
# In-memory buffer
self._buffer: list[dict] = []
def record_message(self, role: str, content: str) -> None:
def record_message(self, role: str, content: str, confidence: float | None = None) -> None:
"""Record a user message.
Args:
role: "user" or "timmy"
content: The message content
confidence: Optional confidence score (0.0 to 1.0)
"""
self._buffer.append(
{
"type": "message",
"role": role,
"content": content,
"timestamp": datetime.now().isoformat(),
}
)
entry = {
"type": "message",
"role": role,
"content": content,
"timestamp": datetime.now().isoformat(),
}
if confidence is not None:
entry["confidence"] = confidence
self._buffer.append(entry)
def record_tool_call(self, tool_name: str, args: dict, result: str) -> None:
"""Record a tool call.

View File

@@ -21,6 +21,7 @@ import logging
import random
import sqlite3
import uuid
from contextlib import closing
from dataclasses import dataclass
from datetime import UTC, datetime, timedelta
from difflib import SequenceMatcher
@@ -320,19 +321,17 @@ class ThinkingEngine:
def get_recent_thoughts(self, limit: int = 20) -> list[Thought]:
"""Retrieve the most recent thoughts."""
conn = _get_conn(self._db_path)
rows = conn.execute(
"SELECT * FROM thoughts ORDER BY created_at DESC LIMIT ?",
(limit,),
).fetchall()
conn.close()
with closing(_get_conn(self._db_path)) as conn:
rows = conn.execute(
"SELECT * FROM thoughts ORDER BY created_at DESC LIMIT ?",
(limit,),
).fetchall()
return [_row_to_thought(r) for r in rows]
def get_thought(self, thought_id: str) -> Thought | None:
"""Retrieve a single thought by ID."""
conn = _get_conn(self._db_path)
row = conn.execute("SELECT * FROM thoughts WHERE id = ?", (thought_id,)).fetchone()
conn.close()
with closing(_get_conn(self._db_path)) as conn:
row = conn.execute("SELECT * FROM thoughts WHERE id = ?", (thought_id,)).fetchone()
return _row_to_thought(row) if row else None
def get_thought_chain(self, thought_id: str, max_depth: int = 20) -> list[Thought]:
@@ -342,26 +341,24 @@ class ThinkingEngine:
"""
chain = []
current_id: str | None = thought_id
conn = _get_conn(self._db_path)
for _ in range(max_depth):
if not current_id:
break
row = conn.execute("SELECT * FROM thoughts WHERE id = ?", (current_id,)).fetchone()
if not row:
break
chain.append(_row_to_thought(row))
current_id = row["parent_id"]
with closing(_get_conn(self._db_path)) as conn:
for _ in range(max_depth):
if not current_id:
break
row = conn.execute("SELECT * FROM thoughts WHERE id = ?", (current_id,)).fetchone()
if not row:
break
chain.append(_row_to_thought(row))
current_id = row["parent_id"]
conn.close()
chain.reverse() # Chronological order
return chain
def count_thoughts(self) -> int:
"""Return total number of stored thoughts."""
conn = _get_conn(self._db_path)
count = conn.execute("SELECT COUNT(*) as c FROM thoughts").fetchone()["c"]
conn.close()
with closing(_get_conn(self._db_path)) as conn:
count = conn.execute("SELECT COUNT(*) as c FROM thoughts").fetchone()["c"]
return count
def prune_old_thoughts(self, keep_days: int = 90, keep_min: int = 200) -> int:
@@ -369,25 +366,23 @@ class ThinkingEngine:
Returns the number of deleted rows.
"""
conn = _get_conn(self._db_path)
try:
total = conn.execute("SELECT COUNT(*) as c FROM thoughts").fetchone()["c"]
if total <= keep_min:
with closing(_get_conn(self._db_path)) as conn:
try:
total = conn.execute("SELECT COUNT(*) as c FROM thoughts").fetchone()["c"]
if total <= keep_min:
return 0
cutoff = (datetime.now(UTC) - timedelta(days=keep_days)).isoformat()
cursor = conn.execute(
"DELETE FROM thoughts WHERE created_at < ? AND id NOT IN "
"(SELECT id FROM thoughts ORDER BY created_at DESC LIMIT ?)",
(cutoff, keep_min),
)
deleted = cursor.rowcount
conn.commit()
return deleted
except Exception as exc:
logger.warning("Thought pruning failed: %s", exc)
return 0
cutoff = (datetime.now(UTC) - timedelta(days=keep_days)).isoformat()
cursor = conn.execute(
"DELETE FROM thoughts WHERE created_at < ? AND id NOT IN "
"(SELECT id FROM thoughts ORDER BY created_at DESC LIMIT ?)",
(cutoff, keep_min),
)
deleted = cursor.rowcount
conn.commit()
return deleted
except Exception as exc:
logger.warning("Thought pruning failed: %s", exc)
return 0
finally:
conn.close()
# ── Private helpers ──────────────────────────────────────────────────
@@ -608,12 +603,11 @@ class ThinkingEngine:
# Thought count today (cheap DB query)
try:
today_start = now.replace(hour=0, minute=0, second=0, microsecond=0)
conn = _get_conn(self._db_path)
count = conn.execute(
"SELECT COUNT(*) as c FROM thoughts WHERE created_at >= ?",
(today_start.isoformat(),),
).fetchone()["c"]
conn.close()
with closing(_get_conn(self._db_path)) as conn:
count = conn.execute(
"SELECT COUNT(*) as c FROM thoughts WHERE created_at >= ?",
(today_start.isoformat(),),
).fetchone()["c"]
parts.append(f"Thoughts today: {count}")
except Exception as exc:
logger.debug("Thought count query failed: %s", exc)
@@ -966,16 +960,21 @@ class ThinkingEngine:
created_at=datetime.now(UTC).isoformat(),
)
conn = _get_conn(self._db_path)
conn.execute(
"""
INSERT INTO thoughts (id, content, seed_type, parent_id, created_at)
VALUES (?, ?, ?, ?, ?)
""",
(thought.id, thought.content, thought.seed_type, thought.parent_id, thought.created_at),
)
conn.commit()
conn.close()
with closing(_get_conn(self._db_path)) as conn:
conn.execute(
"""
INSERT INTO thoughts (id, content, seed_type, parent_id, created_at)
VALUES (?, ?, ?, ?, ?)
""",
(
thought.id,
thought.content,
thought.seed_type,
thought.parent_id,
thought.created_at,
),
)
conn.commit()
return thought
def _log_event(self, thought: Thought) -> None:

View File

@@ -472,26 +472,8 @@ def consult_grok(query: str) -> str:
return response
def create_full_toolkit(base_dir: str | Path | None = None):
"""Create a full toolkit with all available tools (for the orchestrator).
Includes: web search, file read/write, shell commands, python execution,
memory search for contextual recall, and Grok consultation.
"""
if not _AGNO_TOOLS_AVAILABLE:
# Return None when tools aren't available (tests)
return None
from timmy.tool_safety import DANGEROUS_TOOLS
toolkit = Toolkit(
name="full",
)
# Set requires_confirmation_tools AFTER construction (avoids agno WARNING
# about tools not yet registered) but BEFORE register() calls (so each
# Function gets requires_confirmation=True). Fixes #79.
toolkit.requires_confirmation_tools = list(DANGEROUS_TOOLS)
def _register_core_tools(toolkit: Toolkit, base_path: Path) -> None:
"""Register core execution and file tools."""
# Python execution
python_tools = PythonTools()
toolkit.register(python_tools.run_python_code, name="python")
@@ -500,10 +482,7 @@ def create_full_toolkit(base_dir: str | Path | None = None):
shell_tools = ShellTools()
toolkit.register(shell_tools.run_shell_command, name="shell")
# File operations - use repo_root from settings
from config import settings
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
# File operations
file_tools = FileTools(base_dir=base_path)
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
toolkit.register(file_tools.save_file, name="write_file")
@@ -512,7 +491,9 @@ def create_full_toolkit(base_dir: str | Path | None = None):
# Calculator — exact arithmetic (never let the LLM guess)
toolkit.register(calculator, name="calculator")
# Grok consultation — premium frontier reasoning (opt-in)
def _register_grok_tool(toolkit: Toolkit) -> None:
"""Register Grok consultation tool if available."""
try:
from timmy.backends import grok_available
@@ -523,7 +504,9 @@ def create_full_toolkit(base_dir: str | Path | None = None):
logger.warning("Tool execution failed (Grok registration): %s", exc)
logger.debug("Grok tool not available")
# Memory search, write, and forget — persistent recall across all channels
def _register_memory_tools(toolkit: Toolkit) -> None:
"""Register memory search, write, and forget tools."""
try:
from timmy.semantic_memory import memory_forget, memory_read, memory_search, memory_write
@@ -535,7 +518,9 @@ def create_full_toolkit(base_dir: str | Path | None = None):
logger.warning("Tool execution failed (Memory tools registration): %s", exc)
logger.debug("Memory tools not available")
# Agentic loop — background multi-step task execution
def _register_agentic_loop_tool(toolkit: Toolkit) -> None:
"""Register agentic loop tool for background multi-step task execution."""
try:
from timmy.agentic_loop import run_agentic_loop
@@ -582,7 +567,9 @@ def create_full_toolkit(base_dir: str | Path | None = None):
logger.warning("Tool execution failed (plan_and_execute registration): %s", exc)
logger.debug("plan_and_execute tool not available")
# System introspection - query runtime environment (sovereign self-knowledge)
def _register_introspection_tools(toolkit: Toolkit) -> None:
"""Register system introspection tools for runtime environment queries."""
try:
from timmy.tools_intro import (
check_ollama_health,
@@ -599,7 +586,9 @@ def create_full_toolkit(base_dir: str | Path | None = None):
logger.warning("Tool execution failed (Introspection tools registration): %s", exc)
logger.debug("Introspection tools not available")
# Inter-agent delegation - dispatch tasks to swarm agents
def _register_delegation_tools(toolkit: Toolkit) -> None:
"""Register inter-agent delegation tools."""
try:
from timmy.tools_delegation import delegate_task, delegate_to_kimi, list_swarm_agents
@@ -610,6 +599,34 @@ def create_full_toolkit(base_dir: str | Path | None = None):
logger.warning("Tool execution failed (Delegation tools registration): %s", exc)
logger.debug("Delegation tools not available")
def create_full_toolkit(base_dir: str | Path | None = None):
"""Create a full toolkit with all available tools (for the orchestrator).
Includes: web search, file read/write, shell commands, python execution,
memory search for contextual recall, and Grok consultation.
"""
if not _AGNO_TOOLS_AVAILABLE:
# Return None when tools aren't available (tests)
return None
from timmy.tool_safety import DANGEROUS_TOOLS
toolkit = Toolkit(name="full")
# Set requires_confirmation_tools AFTER construction (avoids agno WARNING
# about tools not yet registered) but BEFORE register() calls (so each
# Function gets requires_confirmation=True). Fixes #79.
toolkit.requires_confirmation_tools = list(DANGEROUS_TOOLS)
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
_register_core_tools(toolkit, base_path)
_register_grok_tool(toolkit)
_register_memory_tools(toolkit)
_register_agentic_loop_tool(toolkit)
_register_introspection_tools(toolkit)
_register_delegation_tools(toolkit)
# Gitea issue management is now provided by the gitea-mcp server
# (wired in as MCPTools in agent.py, not registered here)
@@ -719,13 +736,9 @@ get_tools_for_persona = get_tools_for_agent
PERSONA_TOOLKITS = AGENT_TOOLKITS
def get_all_available_tools() -> dict[str, dict]:
"""Get a catalog of all available tools and their descriptions.
Returns:
Dict mapping tool categories to their tools and descriptions.
"""
catalog = {
def _core_tool_catalog() -> dict:
"""Return core file and execution tools catalog entries."""
return {
"shell": {
"name": "Shell Commands",
"description": "Execute shell commands (sandboxed)",
@@ -751,16 +764,39 @@ def get_all_available_tools() -> dict[str, dict]:
"description": "List files in a directory",
"available_in": ["echo", "seer", "forge", "quill", "mace", "helm", "orchestrator"],
},
}
def _analysis_tool_catalog() -> dict:
"""Return analysis and calculation tools catalog entries."""
return {
"calculator": {
"name": "Calculator",
"description": "Evaluate mathematical expressions with exact results",
"available_in": ["orchestrator"],
},
}
def _ai_tool_catalog() -> dict:
"""Return AI assistant and frontier reasoning tools catalog entries."""
return {
"consult_grok": {
"name": "Consult Grok",
"description": "Premium frontier reasoning via xAI Grok (opt-in, Lightning-payable)",
"available_in": ["orchestrator"],
},
"aider": {
"name": "Aider AI Assistant",
"description": "Local AI coding assistant using Ollama (qwen3.5:latest or deepseek-coder)",
"available_in": ["forge", "orchestrator"],
},
}
def _introspection_tool_catalog() -> dict:
"""Return system introspection tools catalog entries."""
return {
"get_system_info": {
"name": "System Info",
"description": "Introspect runtime environment - discover model, Python version, config",
@@ -776,11 +812,12 @@ def get_all_available_tools() -> dict[str, dict]:
"description": "Check status of memory tiers (hot memory, vault)",
"available_in": ["orchestrator"],
},
"aider": {
"name": "Aider AI Assistant",
"description": "Local AI coding assistant using Ollama (qwen3.5:latest or deepseek-coder)",
"available_in": ["forge", "orchestrator"],
},
}
def _experiment_tool_catalog() -> dict:
"""Return ML experiment tools catalog entries."""
return {
"prepare_experiment": {
"name": "Prepare Experiment",
"description": "Clone autoresearch repo and run data preparation for ML experiments",
@@ -798,6 +835,9 @@ def get_all_available_tools() -> dict[str, dict]:
},
}
def _import_creative_catalogs(catalog: dict) -> None:
"""Import and merge creative tool catalogs from creative module."""
# ── Git tools ─────────────────────────────────────────────────────────────
try:
from creative.tools.git_tools import GIT_TOOL_CATALOG
@@ -876,4 +916,18 @@ def get_all_available_tools() -> dict[str, dict]:
except ImportError:
pass
def get_all_available_tools() -> dict[str, dict]:
"""Get a catalog of all available tools and their descriptions.
Returns:
Dict mapping tool categories to their tools and descriptions.
"""
catalog = {}
catalog.update(_core_tool_catalog())
catalog.update(_analysis_tool_catalog())
catalog.update(_ai_tool_catalog())
catalog.update(_introspection_tool_catalog())
catalog.update(_experiment_tool_catalog())
_import_creative_catalogs(catalog)
return catalog

View File

@@ -6,7 +6,9 @@ being told about it in the system prompt.
import logging
import platform
import sqlite3
import sys
from contextlib import closing
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
@@ -174,19 +176,16 @@ def get_memory_status() -> dict[str, Any]:
# Tier 3: Semantic memory row count
tier3_info: dict[str, Any] = {"available": False}
try:
import sqlite3
sem_db = repo_root / "data" / "memory.db"
if sem_db.exists():
conn = sqlite3.connect(str(sem_db))
row = conn.execute(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='chunks'"
).fetchone()
if row and row[0]:
count = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()
tier3_info["available"] = True
tier3_info["vector_count"] = count[0] if count else 0
conn.close()
with closing(sqlite3.connect(str(sem_db))) as conn:
row = conn.execute(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='chunks'"
).fetchone()
if row and row[0]:
count = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()
tier3_info["available"] = True
tier3_info["vector_count"] = count[0] if count else 0
except Exception as exc:
logger.debug("Memory status query failed: %s", exc)
pass
@@ -194,26 +193,23 @@ def get_memory_status() -> dict[str, Any]:
# Self-coding journal stats
journal_info: dict[str, Any] = {"available": False}
try:
import sqlite3 as _sqlite3
journal_db = repo_root / "data" / "self_coding.db"
if journal_db.exists():
conn = _sqlite3.connect(str(journal_db))
conn.row_factory = _sqlite3.Row
rows = conn.execute(
"SELECT outcome, COUNT(*) as cnt FROM modification_journal GROUP BY outcome"
).fetchall()
if rows:
counts = {r["outcome"]: r["cnt"] for r in rows}
total = sum(counts.values())
journal_info = {
"available": True,
"total_attempts": total,
"successes": counts.get("success", 0),
"failures": counts.get("failure", 0),
"success_rate": round(counts.get("success", 0) / total, 2) if total else 0,
}
conn.close()
with closing(sqlite3.connect(str(journal_db))) as conn:
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT outcome, COUNT(*) as cnt FROM modification_journal GROUP BY outcome"
).fetchall()
if rows:
counts = {r["outcome"]: r["cnt"] for r in rows}
total = sum(counts.values())
journal_info = {
"available": True,
"total_attempts": total,
"successes": counts.get("success", 0),
"failures": counts.get("failure", 0),
"success_rate": round(counts.get("success", 0) / total, 2) if total else 0,
}
except Exception as exc:
logger.debug("Journal stats query failed: %s", exc)
pass

View File

@@ -0,0 +1,128 @@
"""Tests for confidence estimation in src/timmy/confidence.py."""
from timmy.confidence import (
CERTAINTY_WORDS,
HEDGING_WORDS,
estimate_confidence,
)
class TestEstimateConfidence:
"""Test cases for estimate_confidence function."""
def test_empty_string_returns_zero(self):
"""Empty string should return 0.0 confidence."""
assert estimate_confidence("") == 0.0
def test_whitespace_only_returns_zero(self):
"""Whitespace-only string should return 0.0 confidence."""
assert estimate_confidence(" ") == 0.0
def test_normal_factual_response(self):
"""Factual response should have at least moderate confidence."""
result = estimate_confidence("Paris is the capital of France.")
assert 0.5 <= result <= 1.0
# 6 words doesn't get short-response boost, should be at base
assert result >= 0.5
def test_i_dont_know_gives_very_low_confidence(self):
"""Direct admission of not knowing should give very low confidence."""
result = estimate_confidence("I don't know the answer to that.")
assert result <= 0.2
def test_i_am_not_sure_gives_very_low_confidence(self):
"""Uncertainty admission should give very low confidence."""
result = estimate_confidence("I am not sure about this.")
assert result <= 0.2
def test_hedging_words_reduce_confidence(self):
"""Hedging words should reduce confidence below base."""
base = estimate_confidence("This is the answer.")
hedged = estimate_confidence("I think this is the answer.")
assert hedged < base
def test_maybe_reduces_confidence(self):
"""The word 'maybe' should reduce confidence."""
base = estimate_confidence("It will rain tomorrow.")
hedged = estimate_confidence("Maybe it will rain tomorrow.")
assert hedged < base
def test_perhaps_reduces_confidence(self):
"""The word 'perhaps' should reduce confidence."""
base = estimate_confidence("The solution is correct.")
hedged = estimate_confidence("Perhaps the solution is correct.")
assert hedged < base
def test_certainty_words_increase_confidence(self):
"""Certainty words should increase confidence above base."""
# Use longer sentence to avoid short-response boost confounding
base = estimate_confidence("This is a longer sentence with more words.")
certain = estimate_confidence(
"I definitely know this is a longer sentence with more words."
)
assert certain > base
def test_definitely_increases_confidence(self):
"""The word 'definitely' should increase confidence."""
base = estimate_confidence("This will work.")
certain = estimate_confidence("This will definitely work.")
assert certain > base
def test_question_reduces_confidence(self):
"""Questions in response should reduce confidence."""
base = estimate_confidence("The value is 10.")
questioning = estimate_confidence("The value is 10?")
assert questioning < base
def test_multiple_hedging_words_compound(self):
"""Multiple hedging words should compound to lower confidence."""
text = "I think maybe this could be the answer, but I'm not sure."
result = estimate_confidence(text)
assert result < 0.4
def test_output_always_in_valid_range(self):
"""Output should always be clamped to [0.0, 1.0]."""
# Test with text that has many hedging words
heavily_hedged = (
"I think maybe perhaps possibly I believe this might could be approximately right."
)
result = estimate_confidence(heavily_hedged)
assert 0.0 <= result <= 1.0
# Test with text that has many certainty words
heavily_certain = "I know definitely certainly absolutely without doubt the answer is specifically exactly correct."
result = estimate_confidence(heavily_certain)
assert 0.0 <= result <= 1.0
def test_hedging_words_list_populated(self):
"""HEDGING_WORDS list should contain expected hedging phrases."""
assert "i think" in HEDGING_WORDS
assert "maybe" in HEDGING_WORDS
assert "perhaps" in HEDGING_WORDS
assert "not sure" in HEDGING_WORDS
assert "possibly" in HEDGING_WORDS
def test_certainty_words_list_populated(self):
"""CERTAINTY_WORDS list should contain expected certainty phrases."""
assert "i know" in CERTAINTY_WORDS
assert "definitely" in CERTAINTY_WORDS
assert "certainly" in CERTAINTY_WORDS
assert "the answer is" in CERTAINTY_WORDS
def test_certainty_and_hedging_cancel(self):
"""Mix of certainty and hedging should balance out near base."""
text = "I definitely think this is correct."
result = estimate_confidence(text)
# Should be near base (0.5) but hedging slightly stronger
assert 0.3 <= result <= 0.7
def test_i_have_no_idea_gives_very_low_confidence(self):
"""I have no idea should give very low confidence."""
result = estimate_confidence("I have no idea what you're talking about.")
assert result <= 0.2
def test_short_response_gets_boost(self):
"""Very short factual responses should get confidence boost."""
short = estimate_confidence("42")
# Short factual should be higher due to boost
assert short > 0.5