Compare commits
2 Commits
claude/iss
...
claude/iss
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
12b2e4129f | ||
| d697c3d93e |
@@ -549,12 +549,28 @@ async def lifespan(app: FastAPI):
|
||||
except Exception:
|
||||
logger.debug("Failed to register error recorder")
|
||||
|
||||
# Mark session start for sovereignty duration tracking
|
||||
try:
|
||||
from timmy.sovereignty import mark_session_start
|
||||
|
||||
mark_session_start()
|
||||
except Exception:
|
||||
logger.debug("Failed to mark sovereignty session start")
|
||||
|
||||
logger.info("✓ Dashboard ready for requests")
|
||||
|
||||
yield
|
||||
|
||||
await _shutdown_cleanup(bg_tasks, workshop_heartbeat)
|
||||
|
||||
# Generate and commit sovereignty session report
|
||||
try:
|
||||
from timmy.sovereignty import generate_and_commit_report
|
||||
|
||||
await generate_and_commit_report()
|
||||
except Exception as exc:
|
||||
logger.warning("Sovereignty report generation failed at shutdown: %s", exc)
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="Mission Control",
|
||||
|
||||
@@ -4,4 +4,23 @@ Tracks how much of each AI layer (perception, decision, narration)
|
||||
runs locally vs. calls out to an LLM. Feeds the sovereignty dashboard.
|
||||
|
||||
Refs: #954, #953
|
||||
|
||||
Session reporting: auto-generates markdown scorecards at session end
|
||||
and commits them to the Gitea repo for institutional memory.
|
||||
|
||||
Refs: #957 (Session Sovereignty Report Generator)
|
||||
"""
|
||||
|
||||
from timmy.sovereignty.session_report import (
|
||||
commit_report,
|
||||
generate_and_commit_report,
|
||||
generate_report,
|
||||
mark_session_start,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"generate_report",
|
||||
"commit_report",
|
||||
"generate_and_commit_report",
|
||||
"mark_session_start",
|
||||
]
|
||||
|
||||
442
src/timmy/sovereignty/session_report.py
Normal file
442
src/timmy/sovereignty/session_report.py
Normal file
@@ -0,0 +1,442 @@
|
||||
"""Session Sovereignty Report Generator.
|
||||
|
||||
Auto-generates a sovereignty scorecard at the end of each play session
|
||||
and commits it as a markdown file to the Gitea repo under
|
||||
``reports/sovereignty/``.
|
||||
|
||||
Report contents (per issue #957):
|
||||
- Session duration + game played
|
||||
- Total model calls by type (VLM, LLM, TTS, API)
|
||||
- Total cache/rule hits by type
|
||||
- New skills crystallized (placeholder — pending skill-tracking impl)
|
||||
- Sovereignty delta (change from session start → end)
|
||||
- Cost breakdown (actual API spend)
|
||||
- Per-layer sovereignty %: perception, decision, narration
|
||||
- Trend comparison vs previous session
|
||||
|
||||
Refs: #957 (Sovereignty P0) · #953 (The Sovereignty Loop)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from config import settings
|
||||
|
||||
# Optional module-level imports — degrade gracefully if unavailable at import time
|
||||
try:
|
||||
from timmy.session_logger import get_session_logger
|
||||
except Exception: # ImportError or circular import during early startup
|
||||
get_session_logger = None # type: ignore[assignment]
|
||||
|
||||
try:
|
||||
from infrastructure.sovereignty_metrics import GRADUATION_TARGETS, get_sovereignty_store
|
||||
except Exception:
|
||||
GRADUATION_TARGETS: dict = {} # type: ignore[assignment]
|
||||
get_sovereignty_store = None # type: ignore[assignment]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Module-level session start time; set by mark_session_start()
|
||||
_SESSION_START: datetime | None = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def mark_session_start() -> None:
|
||||
"""Record the session start wall-clock time.
|
||||
|
||||
Call once during application startup so ``generate_report()`` can
|
||||
compute accurate session durations.
|
||||
"""
|
||||
global _SESSION_START
|
||||
_SESSION_START = datetime.now(UTC)
|
||||
logger.debug("Sovereignty: session start recorded at %s", _SESSION_START.isoformat())
|
||||
|
||||
|
||||
def generate_report(session_id: str = "dashboard") -> str:
|
||||
"""Render a sovereignty scorecard as a markdown string.
|
||||
|
||||
Pulls from:
|
||||
- ``timmy.session_logger`` — message/tool-call/error counts
|
||||
- ``infrastructure.sovereignty_metrics`` — cache hit rate, API cost,
|
||||
graduation phase, and trend data
|
||||
|
||||
Args:
|
||||
session_id: The session identifier (default: "dashboard").
|
||||
|
||||
Returns:
|
||||
Markdown-formatted sovereignty report string.
|
||||
"""
|
||||
now = datetime.now(UTC)
|
||||
session_start = _SESSION_START or now
|
||||
duration_secs = (now - session_start).total_seconds()
|
||||
|
||||
session_data = _gather_session_data()
|
||||
sov_data = _gather_sovereignty_data()
|
||||
|
||||
return _render_markdown(now, session_id, duration_secs, session_data, sov_data)
|
||||
|
||||
|
||||
def commit_report(report_md: str, session_id: str = "dashboard") -> bool:
|
||||
"""Commit a sovereignty report to the Gitea repo.
|
||||
|
||||
Creates or updates ``reports/sovereignty/{date}_{session_id}.md``
|
||||
via the Gitea Contents API. Degrades gracefully: logs a warning
|
||||
and returns ``False`` if Gitea is unreachable or misconfigured.
|
||||
|
||||
Args:
|
||||
report_md: Markdown content to commit.
|
||||
session_id: Session identifier used in the filename.
|
||||
|
||||
Returns:
|
||||
``True`` on success, ``False`` on failure.
|
||||
"""
|
||||
if not settings.gitea_enabled:
|
||||
logger.info("Sovereignty: Gitea disabled — skipping report commit")
|
||||
return False
|
||||
|
||||
if not settings.gitea_token:
|
||||
logger.warning("Sovereignty: no Gitea token — skipping report commit")
|
||||
return False
|
||||
|
||||
date_str = datetime.now(UTC).strftime("%Y-%m-%d")
|
||||
file_path = f"reports/sovereignty/{date_str}_{session_id}.md"
|
||||
url = f"{settings.gitea_url}/api/v1/repos/{settings.gitea_repo}/contents/{file_path}"
|
||||
headers = {
|
||||
"Authorization": f"token {settings.gitea_token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
encoded_content = base64.b64encode(report_md.encode()).decode()
|
||||
commit_message = (
|
||||
f"report: sovereignty session {session_id} ({date_str})\n\n"
|
||||
f"Auto-generated by Timmy. Refs #957"
|
||||
)
|
||||
payload: dict[str, Any] = {
|
||||
"message": commit_message,
|
||||
"content": encoded_content,
|
||||
}
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=10.0) as client:
|
||||
# Fetch existing file SHA so we can update rather than create
|
||||
check = client.get(url, headers=headers)
|
||||
if check.status_code == 200:
|
||||
existing = check.json()
|
||||
payload["sha"] = existing.get("sha", "")
|
||||
|
||||
resp = client.put(url, headers=headers, json=payload)
|
||||
resp.raise_for_status()
|
||||
|
||||
logger.info("Sovereignty: report committed to %s", file_path)
|
||||
return True
|
||||
|
||||
except httpx.HTTPStatusError as exc:
|
||||
logger.warning(
|
||||
"Sovereignty: commit failed (HTTP %s): %s",
|
||||
exc.response.status_code,
|
||||
exc,
|
||||
)
|
||||
return False
|
||||
except Exception as exc:
|
||||
logger.warning("Sovereignty: commit failed: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
async def generate_and_commit_report(session_id: str = "dashboard") -> bool:
|
||||
"""Generate and commit a sovereignty report for the current session.
|
||||
|
||||
Primary entry point — call at session end / application shutdown.
|
||||
Wraps the synchronous ``commit_report`` call in ``asyncio.to_thread``
|
||||
so it does not block the event loop.
|
||||
|
||||
Args:
|
||||
session_id: The session identifier.
|
||||
|
||||
Returns:
|
||||
``True`` if the report was generated and committed successfully.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
try:
|
||||
report_md = generate_report(session_id)
|
||||
logger.info("Sovereignty: report generated (%d chars)", len(report_md))
|
||||
committed = await asyncio.to_thread(commit_report, report_md, session_id)
|
||||
return committed
|
||||
except Exception as exc:
|
||||
logger.warning("Sovereignty: report generation failed: %s", exc)
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _format_duration(seconds: float) -> str:
|
||||
"""Format a duration in seconds as a human-readable string."""
|
||||
total = int(seconds)
|
||||
hours, remainder = divmod(total, 3600)
|
||||
minutes, secs = divmod(remainder, 60)
|
||||
if hours:
|
||||
return f"{hours}h {minutes}m {secs}s"
|
||||
if minutes:
|
||||
return f"{minutes}m {secs}s"
|
||||
return f"{secs}s"
|
||||
|
||||
|
||||
def _gather_session_data() -> dict[str, Any]:
|
||||
"""Pull session statistics from the session logger.
|
||||
|
||||
Returns a dict with:
|
||||
- ``user_messages``, ``timmy_messages``, ``tool_calls``, ``errors``
|
||||
- ``tool_call_breakdown``: dict[tool_name, count]
|
||||
"""
|
||||
default: dict[str, Any] = {
|
||||
"user_messages": 0,
|
||||
"timmy_messages": 0,
|
||||
"tool_calls": 0,
|
||||
"errors": 0,
|
||||
"tool_call_breakdown": {},
|
||||
}
|
||||
|
||||
try:
|
||||
if get_session_logger is None:
|
||||
return default
|
||||
sl = get_session_logger()
|
||||
sl.flush()
|
||||
|
||||
# Read today's session file directly for accurate counts
|
||||
if not sl.session_file.exists():
|
||||
return default
|
||||
|
||||
entries: list[dict] = []
|
||||
with open(sl.session_file) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
try:
|
||||
entries.append(json.loads(line))
|
||||
except json.JSONDecodeError:
|
||||
continue
|
||||
|
||||
tool_breakdown: dict[str, int] = {}
|
||||
user_msgs = timmy_msgs = tool_calls = errors = 0
|
||||
|
||||
for entry in entries:
|
||||
etype = entry.get("type")
|
||||
if etype == "message":
|
||||
if entry.get("role") == "user":
|
||||
user_msgs += 1
|
||||
elif entry.get("role") == "timmy":
|
||||
timmy_msgs += 1
|
||||
elif etype == "tool_call":
|
||||
tool_calls += 1
|
||||
tool_name = entry.get("tool", "unknown")
|
||||
tool_breakdown[tool_name] = tool_breakdown.get(tool_name, 0) + 1
|
||||
elif etype == "error":
|
||||
errors += 1
|
||||
|
||||
return {
|
||||
"user_messages": user_msgs,
|
||||
"timmy_messages": timmy_msgs,
|
||||
"tool_calls": tool_calls,
|
||||
"errors": errors,
|
||||
"tool_call_breakdown": tool_breakdown,
|
||||
}
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("Sovereignty: failed to gather session data: %s", exc)
|
||||
return default
|
||||
|
||||
|
||||
def _gather_sovereignty_data() -> dict[str, Any]:
|
||||
"""Pull sovereignty metrics from the SQLite store.
|
||||
|
||||
Returns a dict with:
|
||||
- ``metrics``: summary from ``SovereigntyMetricsStore.get_summary()``
|
||||
- ``deltas``: per-metric start/end values within recent history window
|
||||
- ``previous_session``: most recent prior value for each metric
|
||||
"""
|
||||
try:
|
||||
if get_sovereignty_store is None:
|
||||
return {"metrics": {}, "deltas": {}, "previous_session": {}}
|
||||
store = get_sovereignty_store()
|
||||
summary = store.get_summary()
|
||||
|
||||
deltas: dict[str, dict[str, Any]] = {}
|
||||
previous_session: dict[str, float | None] = {}
|
||||
|
||||
for metric_type in GRADUATION_TARGETS:
|
||||
history = store.get_latest(metric_type, limit=10)
|
||||
if len(history) >= 2:
|
||||
deltas[metric_type] = {
|
||||
"start": history[-1]["value"],
|
||||
"end": history[0]["value"],
|
||||
}
|
||||
previous_session[metric_type] = history[1]["value"]
|
||||
elif len(history) == 1:
|
||||
deltas[metric_type] = {"start": history[0]["value"], "end": history[0]["value"]}
|
||||
previous_session[metric_type] = None
|
||||
else:
|
||||
deltas[metric_type] = {"start": None, "end": None}
|
||||
previous_session[metric_type] = None
|
||||
|
||||
return {
|
||||
"metrics": summary,
|
||||
"deltas": deltas,
|
||||
"previous_session": previous_session,
|
||||
}
|
||||
|
||||
except Exception as exc:
|
||||
logger.warning("Sovereignty: failed to gather sovereignty data: %s", exc)
|
||||
return {"metrics": {}, "deltas": {}, "previous_session": {}}
|
||||
|
||||
|
||||
def _render_markdown(
|
||||
now: datetime,
|
||||
session_id: str,
|
||||
duration_secs: float,
|
||||
session_data: dict[str, Any],
|
||||
sov_data: dict[str, Any],
|
||||
) -> str:
|
||||
"""Assemble the full sovereignty report in markdown."""
|
||||
lines: list[str] = []
|
||||
|
||||
# Header
|
||||
lines += [
|
||||
"# Sovereignty Session Report",
|
||||
"",
|
||||
f"**Session ID:** `{session_id}` ",
|
||||
f"**Date:** {now.strftime('%Y-%m-%d')} ",
|
||||
f"**Duration:** {_format_duration(duration_secs)} ",
|
||||
f"**Generated:** {now.isoformat()}",
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
]
|
||||
|
||||
# Session activity
|
||||
lines += [
|
||||
"## Session Activity",
|
||||
"",
|
||||
"| Metric | Count |",
|
||||
"|--------|-------|",
|
||||
f"| User messages | {session_data['user_messages']} |",
|
||||
f"| Timmy responses | {session_data['timmy_messages']} |",
|
||||
f"| Tool calls | {session_data['tool_calls']} |",
|
||||
f"| Errors | {session_data['errors']} |",
|
||||
"",
|
||||
]
|
||||
|
||||
tool_breakdown = session_data.get("tool_call_breakdown", {})
|
||||
if tool_breakdown:
|
||||
lines += ["### Model Calls by Tool", ""]
|
||||
for tool_name, count in sorted(tool_breakdown.items(), key=lambda x: -x[1]):
|
||||
lines.append(f"- `{tool_name}`: {count}")
|
||||
lines.append("")
|
||||
|
||||
# Sovereignty scorecard
|
||||
|
||||
lines += [
|
||||
"## Sovereignty Scorecard",
|
||||
"",
|
||||
"| Metric | Current | Target (graduation) | Phase |",
|
||||
"|--------|---------|---------------------|-------|",
|
||||
]
|
||||
|
||||
for metric_type, data in sov_data["metrics"].items():
|
||||
current = data.get("current")
|
||||
current_str = f"{current:.4f}" if current is not None else "N/A"
|
||||
grad_target = GRADUATION_TARGETS.get(metric_type, {}).get("graduation")
|
||||
grad_str = f"{grad_target:.4f}" if isinstance(grad_target, (int, float)) else "N/A"
|
||||
phase = data.get("phase", "unknown")
|
||||
lines.append(f"| {metric_type} | {current_str} | {grad_str} | {phase} |")
|
||||
|
||||
lines += ["", "### Sovereignty Delta (This Session)", ""]
|
||||
|
||||
for metric_type, delta_info in sov_data.get("deltas", {}).items():
|
||||
start_val = delta_info.get("start")
|
||||
end_val = delta_info.get("end")
|
||||
if start_val is not None and end_val is not None:
|
||||
diff = end_val - start_val
|
||||
sign = "+" if diff >= 0 else ""
|
||||
lines.append(
|
||||
f"- **{metric_type}**: {start_val:.4f} → {end_val:.4f} ({sign}{diff:.4f})"
|
||||
)
|
||||
else:
|
||||
lines.append(f"- **{metric_type}**: N/A (no data recorded)")
|
||||
|
||||
# Cost breakdown
|
||||
lines += ["", "## Cost Breakdown", ""]
|
||||
api_cost_data = sov_data["metrics"].get("api_cost", {})
|
||||
current_cost = api_cost_data.get("current")
|
||||
if current_cost is not None:
|
||||
lines.append(f"- **Total API spend (latest recorded):** ${current_cost:.4f}")
|
||||
else:
|
||||
lines.append("- **Total API spend:** N/A (no data recorded)")
|
||||
lines.append("")
|
||||
|
||||
# Per-layer sovereignty
|
||||
lines += [
|
||||
"## Per-Layer Sovereignty",
|
||||
"",
|
||||
"| Layer | Sovereignty % |",
|
||||
"|-------|--------------|",
|
||||
"| Perception (VLM) | N/A |",
|
||||
"| Decision (LLM) | N/A |",
|
||||
"| Narration (TTS) | N/A |",
|
||||
"",
|
||||
"> Per-layer tracking requires instrumented inference calls. See #957.",
|
||||
"",
|
||||
]
|
||||
|
||||
# Skills crystallized
|
||||
lines += [
|
||||
"## Skills Crystallized",
|
||||
"",
|
||||
"_Skill crystallization tracking not yet implemented. See #957._",
|
||||
"",
|
||||
]
|
||||
|
||||
# Trend vs previous session
|
||||
lines += ["## Trend vs Previous Session", ""]
|
||||
prev_data = sov_data.get("previous_session", {})
|
||||
has_prev = any(v is not None for v in prev_data.values())
|
||||
|
||||
if has_prev:
|
||||
lines += [
|
||||
"| Metric | Previous | Current | Change |",
|
||||
"|--------|----------|---------|--------|",
|
||||
]
|
||||
for metric_type, curr_info in sov_data["metrics"].items():
|
||||
curr_val = curr_info.get("current")
|
||||
prev_val = prev_data.get(metric_type)
|
||||
curr_str = f"{curr_val:.4f}" if curr_val is not None else "N/A"
|
||||
prev_str = f"{prev_val:.4f}" if prev_val is not None else "N/A"
|
||||
if curr_val is not None and prev_val is not None:
|
||||
diff = curr_val - prev_val
|
||||
sign = "+" if diff >= 0 else ""
|
||||
change_str = f"{sign}{diff:.4f}"
|
||||
else:
|
||||
change_str = "N/A"
|
||||
lines.append(f"| {metric_type} | {prev_str} | {curr_str} | {change_str} |")
|
||||
lines.append("")
|
||||
else:
|
||||
lines += ["_No previous session data available for comparison._", ""]
|
||||
|
||||
# Footer
|
||||
lines += [
|
||||
"---",
|
||||
"_Auto-generated by Timmy · Session Sovereignty Report · Refs: #957_",
|
||||
]
|
||||
|
||||
return "\n".join(lines)
|
||||
94
src/timmy/tools/__init__.py
Normal file
94
src/timmy/tools/__init__.py
Normal file
@@ -0,0 +1,94 @@
|
||||
"""Tool integration for the agent swarm.
|
||||
|
||||
Provides agents with capabilities for:
|
||||
- File read/write (local filesystem)
|
||||
- Shell command execution (sandboxed)
|
||||
- Python code execution
|
||||
- Git operations
|
||||
- Image / Music / Video generation (creative pipeline)
|
||||
|
||||
Tools are assigned to agents based on their specialties.
|
||||
|
||||
Sub-modules:
|
||||
- _base: shared types, tracking state
|
||||
- file_tools: file-operation toolkit factories (Echo, Quill, Seer)
|
||||
- system_tools: calculator, AI tools, code/devops toolkit factories
|
||||
- _registry: full toolkit construction, agent registry, tool catalog
|
||||
"""
|
||||
|
||||
# Re-export everything for backward compatibility — callers that do
|
||||
# ``from timmy.tools import <symbol>`` continue to work unchanged.
|
||||
|
||||
from timmy.tools._base import (
|
||||
AgentTools,
|
||||
PersonaTools,
|
||||
ToolStats,
|
||||
_AGNO_TOOLS_AVAILABLE,
|
||||
_ImportError,
|
||||
_TOOL_USAGE,
|
||||
_track_tool_usage,
|
||||
get_tool_stats,
|
||||
)
|
||||
from timmy.tools._registry import (
|
||||
AGENT_TOOLKITS,
|
||||
PERSONA_TOOLKITS,
|
||||
_create_stub_toolkit,
|
||||
_merge_catalog,
|
||||
create_experiment_tools,
|
||||
create_full_toolkit,
|
||||
get_all_available_tools,
|
||||
get_tools_for_agent,
|
||||
get_tools_for_persona,
|
||||
)
|
||||
from timmy.tools.file_tools import (
|
||||
_make_smart_read_file,
|
||||
create_data_tools,
|
||||
create_research_tools,
|
||||
create_writing_tools,
|
||||
)
|
||||
from timmy.tools.system_tools import (
|
||||
_safe_eval,
|
||||
calculator,
|
||||
consult_grok,
|
||||
create_aider_tool,
|
||||
create_code_tools,
|
||||
create_devops_tools,
|
||||
create_security_tools,
|
||||
web_fetch,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# _base
|
||||
"AgentTools",
|
||||
"PersonaTools",
|
||||
"ToolStats",
|
||||
"_AGNO_TOOLS_AVAILABLE",
|
||||
"_ImportError",
|
||||
"_TOOL_USAGE",
|
||||
"_track_tool_usage",
|
||||
"get_tool_stats",
|
||||
# file_tools
|
||||
"_make_smart_read_file",
|
||||
"create_data_tools",
|
||||
"create_research_tools",
|
||||
"create_writing_tools",
|
||||
# system_tools
|
||||
"_safe_eval",
|
||||
"calculator",
|
||||
"consult_grok",
|
||||
"create_aider_tool",
|
||||
"create_code_tools",
|
||||
"create_devops_tools",
|
||||
"create_security_tools",
|
||||
"web_fetch",
|
||||
# _registry
|
||||
"AGENT_TOOLKITS",
|
||||
"PERSONA_TOOLKITS",
|
||||
"_create_stub_toolkit",
|
||||
"_merge_catalog",
|
||||
"create_experiment_tools",
|
||||
"create_full_toolkit",
|
||||
"get_all_available_tools",
|
||||
"get_tools_for_agent",
|
||||
"get_tools_for_persona",
|
||||
]
|
||||
90
src/timmy/tools/_base.py
Normal file
90
src/timmy/tools/_base.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""Base types, shared state, and tracking for the Timmy tool system."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Lazy imports to handle test mocking
|
||||
_ImportError = None
|
||||
try:
|
||||
from agno.tools import Toolkit
|
||||
from agno.tools.file import FileTools
|
||||
from agno.tools.python import PythonTools
|
||||
from agno.tools.shell import ShellTools
|
||||
|
||||
_AGNO_TOOLS_AVAILABLE = True
|
||||
except ImportError as e:
|
||||
_AGNO_TOOLS_AVAILABLE = False
|
||||
_ImportError = e
|
||||
|
||||
# Track tool usage stats
|
||||
_TOOL_USAGE: dict[str, list[dict]] = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolStats:
|
||||
"""Statistics for a single tool."""
|
||||
|
||||
tool_name: str
|
||||
call_count: int = 0
|
||||
last_used: str | None = None
|
||||
errors: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentTools:
|
||||
"""Tools assigned to an agent."""
|
||||
|
||||
agent_id: str
|
||||
agent_name: str
|
||||
toolkit: "Toolkit"
|
||||
available_tools: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# Backward-compat alias
|
||||
PersonaTools = AgentTools
|
||||
|
||||
|
||||
def _track_tool_usage(agent_id: str, tool_name: str, success: bool = True) -> None:
|
||||
"""Track tool usage for analytics."""
|
||||
if agent_id not in _TOOL_USAGE:
|
||||
_TOOL_USAGE[agent_id] = []
|
||||
_TOOL_USAGE[agent_id].append(
|
||||
{
|
||||
"tool": tool_name,
|
||||
"timestamp": datetime.now(UTC).isoformat(),
|
||||
"success": success,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def get_tool_stats(agent_id: str | None = None) -> dict:
|
||||
"""Get tool usage statistics.
|
||||
|
||||
Args:
|
||||
agent_id: Optional agent ID to filter by. If None, returns stats for all agents.
|
||||
|
||||
Returns:
|
||||
Dict with tool usage statistics.
|
||||
"""
|
||||
if agent_id:
|
||||
usage = _TOOL_USAGE.get(agent_id, [])
|
||||
return {
|
||||
"agent_id": agent_id,
|
||||
"total_calls": len(usage),
|
||||
"tools_used": list(set(u["tool"] for u in usage)),
|
||||
"recent_calls": usage[-10:] if usage else [],
|
||||
}
|
||||
|
||||
# Return stats for all agents
|
||||
all_stats = {}
|
||||
for aid, usage in _TOOL_USAGE.items():
|
||||
all_stats[aid] = {
|
||||
"total_calls": len(usage),
|
||||
"tools_used": list(set(u["tool"] for u in usage)),
|
||||
}
|
||||
return all_stats
|
||||
@@ -1,532 +1,48 @@
|
||||
"""Tool integration for the agent swarm.
|
||||
"""Tool registry, full toolkit construction, and tool catalog.
|
||||
|
||||
Provides agents with capabilities for:
|
||||
- File read/write (local filesystem)
|
||||
- Shell command execution (sandboxed)
|
||||
- Python code execution
|
||||
- Git operations
|
||||
- Image / Music / Video generation (creative pipeline)
|
||||
|
||||
Tools are assigned to agents based on their specialties.
|
||||
Provides:
|
||||
- Internal _register_* helpers for wiring tools into toolkits
|
||||
- create_full_toolkit (orchestrator toolkit)
|
||||
- create_experiment_tools (Lab agent toolkit)
|
||||
- AGENT_TOOLKITS / get_tools_for_agent registry
|
||||
- get_all_available_tools catalog
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import logging
|
||||
import math
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
from config import settings
|
||||
from timmy.tools._base import (
|
||||
_AGNO_TOOLS_AVAILABLE,
|
||||
_ImportError,
|
||||
FileTools,
|
||||
PythonTools,
|
||||
ShellTools,
|
||||
Toolkit,
|
||||
)
|
||||
from timmy.tools.file_tools import (
|
||||
_make_smart_read_file,
|
||||
create_data_tools,
|
||||
create_research_tools,
|
||||
create_writing_tools,
|
||||
)
|
||||
from timmy.tools.system_tools import (
|
||||
calculator,
|
||||
consult_grok,
|
||||
create_code_tools,
|
||||
create_devops_tools,
|
||||
create_security_tools,
|
||||
web_fetch,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Max characters of user query included in Lightning invoice memo
|
||||
_INVOICE_MEMO_MAX_LEN = 50
|
||||
|
||||
# Lazy imports to handle test mocking
|
||||
_ImportError = None
|
||||
try:
|
||||
from agno.tools import Toolkit
|
||||
from agno.tools.file import FileTools
|
||||
from agno.tools.python import PythonTools
|
||||
from agno.tools.shell import ShellTools
|
||||
|
||||
_AGNO_TOOLS_AVAILABLE = True
|
||||
except ImportError as e:
|
||||
_AGNO_TOOLS_AVAILABLE = False
|
||||
_ImportError = e
|
||||
|
||||
# Track tool usage stats
|
||||
_TOOL_USAGE: dict[str, list[dict]] = {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolStats:
|
||||
"""Statistics for a single tool."""
|
||||
|
||||
tool_name: str
|
||||
call_count: int = 0
|
||||
last_used: str | None = None
|
||||
errors: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class AgentTools:
|
||||
"""Tools assigned to an agent."""
|
||||
|
||||
agent_id: str
|
||||
agent_name: str
|
||||
toolkit: Toolkit
|
||||
available_tools: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# Backward-compat alias
|
||||
PersonaTools = AgentTools
|
||||
|
||||
|
||||
def _track_tool_usage(agent_id: str, tool_name: str, success: bool = True) -> None:
|
||||
"""Track tool usage for analytics."""
|
||||
if agent_id not in _TOOL_USAGE:
|
||||
_TOOL_USAGE[agent_id] = []
|
||||
_TOOL_USAGE[agent_id].append(
|
||||
{
|
||||
"tool": tool_name,
|
||||
"timestamp": datetime.now(UTC).isoformat(),
|
||||
"success": success,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def get_tool_stats(agent_id: str | None = None) -> dict:
|
||||
"""Get tool usage statistics.
|
||||
|
||||
Args:
|
||||
agent_id: Optional agent ID to filter by. If None, returns stats for all agents.
|
||||
|
||||
Returns:
|
||||
Dict with tool usage statistics.
|
||||
"""
|
||||
if agent_id:
|
||||
usage = _TOOL_USAGE.get(agent_id, [])
|
||||
return {
|
||||
"agent_id": agent_id,
|
||||
"total_calls": len(usage),
|
||||
"tools_used": list(set(u["tool"] for u in usage)),
|
||||
"recent_calls": usage[-10:] if usage else [],
|
||||
}
|
||||
|
||||
# Return stats for all agents
|
||||
all_stats = {}
|
||||
for aid, usage in _TOOL_USAGE.items():
|
||||
all_stats[aid] = {
|
||||
"total_calls": len(usage),
|
||||
"tools_used": list(set(u["tool"] for u in usage)),
|
||||
}
|
||||
return all_stats
|
||||
|
||||
|
||||
def _safe_eval(node, allowed_names: dict):
|
||||
"""Walk an AST and evaluate only safe numeric operations."""
|
||||
if isinstance(node, ast.Expression):
|
||||
return _safe_eval(node.body, allowed_names)
|
||||
if isinstance(node, ast.Constant):
|
||||
if isinstance(node.value, (int, float, complex)):
|
||||
return node.value
|
||||
raise ValueError(f"Unsupported constant: {node.value!r}")
|
||||
if isinstance(node, ast.UnaryOp):
|
||||
operand = _safe_eval(node.operand, allowed_names)
|
||||
if isinstance(node.op, ast.UAdd):
|
||||
return +operand
|
||||
if isinstance(node.op, ast.USub):
|
||||
return -operand
|
||||
raise ValueError(f"Unsupported unary op: {type(node.op).__name__}")
|
||||
if isinstance(node, ast.BinOp):
|
||||
left = _safe_eval(node.left, allowed_names)
|
||||
right = _safe_eval(node.right, allowed_names)
|
||||
ops = {
|
||||
ast.Add: lambda a, b: a + b,
|
||||
ast.Sub: lambda a, b: a - b,
|
||||
ast.Mult: lambda a, b: a * b,
|
||||
ast.Div: lambda a, b: a / b,
|
||||
ast.FloorDiv: lambda a, b: a // b,
|
||||
ast.Mod: lambda a, b: a % b,
|
||||
ast.Pow: lambda a, b: a**b,
|
||||
}
|
||||
op_fn = ops.get(type(node.op))
|
||||
if op_fn is None:
|
||||
raise ValueError(f"Unsupported binary op: {type(node.op).__name__}")
|
||||
return op_fn(left, right)
|
||||
if isinstance(node, ast.Name):
|
||||
if node.id in allowed_names:
|
||||
return allowed_names[node.id]
|
||||
raise ValueError(f"Unknown name: {node.id!r}")
|
||||
if isinstance(node, ast.Attribute):
|
||||
value = _safe_eval(node.value, allowed_names)
|
||||
# Only allow attribute access on the math module
|
||||
if value is math:
|
||||
attr = getattr(math, node.attr, None)
|
||||
if attr is not None:
|
||||
return attr
|
||||
raise ValueError(f"Attribute access not allowed: .{node.attr}")
|
||||
if isinstance(node, ast.Call):
|
||||
func = _safe_eval(node.func, allowed_names)
|
||||
if not callable(func):
|
||||
raise ValueError(f"Not callable: {func!r}")
|
||||
args = [_safe_eval(a, allowed_names) for a in node.args]
|
||||
kwargs = {kw.arg: _safe_eval(kw.value, allowed_names) for kw in node.keywords}
|
||||
return func(*args, **kwargs)
|
||||
raise ValueError(f"Unsupported syntax: {type(node).__name__}")
|
||||
|
||||
|
||||
def calculator(expression: str) -> str:
|
||||
"""Evaluate a mathematical expression and return the exact result.
|
||||
|
||||
Use this tool for ANY arithmetic: multiplication, division, square roots,
|
||||
exponents, percentages, logarithms, trigonometry, etc.
|
||||
|
||||
Args:
|
||||
expression: A valid Python math expression, e.g. '347 * 829',
|
||||
'math.sqrt(17161)', '2**10', 'math.log(100, 10)'.
|
||||
|
||||
Returns:
|
||||
The exact result as a string.
|
||||
"""
|
||||
allowed_names = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")}
|
||||
allowed_names["math"] = math
|
||||
allowed_names["abs"] = abs
|
||||
allowed_names["round"] = round
|
||||
allowed_names["min"] = min
|
||||
allowed_names["max"] = max
|
||||
try:
|
||||
tree = ast.parse(expression, mode="eval")
|
||||
result = _safe_eval(tree, allowed_names)
|
||||
return str(result)
|
||||
except Exception as e: # broad catch intentional: arbitrary code execution
|
||||
return f"Error evaluating '{expression}': {e}"
|
||||
|
||||
|
||||
def _make_smart_read_file(file_tools: FileTools) -> Callable:
|
||||
"""Wrap FileTools.read_file so directories auto-list their contents.
|
||||
|
||||
When the user (or the LLM) passes a directory path to read_file,
|
||||
the raw Agno implementation throws an IsADirectoryError. This
|
||||
wrapper detects that case, lists the directory entries, and returns
|
||||
a helpful message so the model can pick the right file on its own.
|
||||
"""
|
||||
original_read = file_tools.read_file
|
||||
|
||||
def smart_read_file(file_name: str = "", encoding: str = "utf-8", **kwargs) -> str:
|
||||
"""Reads the contents of the file `file_name` and returns the contents if successful."""
|
||||
# LLMs often call read_file(path=...) instead of read_file(file_name=...)
|
||||
if not file_name:
|
||||
file_name = kwargs.get("path", "")
|
||||
if not file_name:
|
||||
return "Error: no file_name or path provided."
|
||||
# Resolve the path the same way FileTools does
|
||||
_safe, resolved = file_tools.check_escape(file_name)
|
||||
if _safe and resolved.is_dir():
|
||||
entries = sorted(p.name for p in resolved.iterdir() if not p.name.startswith("."))
|
||||
listing = "\n".join(f" - {e}" for e in entries) if entries else " (empty directory)"
|
||||
return (
|
||||
f"'{file_name}' is a directory, not a file. "
|
||||
f"Files inside:\n{listing}\n\n"
|
||||
"Please call read_file with one of the files listed above."
|
||||
)
|
||||
return original_read(file_name, encoding=encoding)
|
||||
|
||||
# Preserve the original docstring for Agno tool schema generation
|
||||
smart_read_file.__doc__ = original_read.__doc__
|
||||
return smart_read_file
|
||||
|
||||
|
||||
def create_research_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the research agent (Echo).
|
||||
|
||||
Includes: file reading
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="research")
|
||||
|
||||
# File reading
|
||||
from config import settings
|
||||
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def create_code_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the code agent (Forge).
|
||||
|
||||
Includes: shell commands, python execution, file read/write, Aider AI assist
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="code")
|
||||
|
||||
# Shell commands (sandboxed)
|
||||
shell_tools = ShellTools()
|
||||
toolkit.register(shell_tools.run_shell_command, name="shell")
|
||||
|
||||
# Python execution
|
||||
python_tools = PythonTools()
|
||||
toolkit.register(python_tools.run_python_code, name="python")
|
||||
|
||||
# File operations
|
||||
from config import settings
|
||||
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.save_file, name="write_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
# Aider AI coding assistant (local with Ollama)
|
||||
aider_tool = create_aider_tool(base_path)
|
||||
toolkit.register(aider_tool.run_aider, name="aider")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def create_aider_tool(base_path: Path):
|
||||
"""Create an Aider tool for AI-assisted coding."""
|
||||
import subprocess
|
||||
|
||||
class AiderTool:
|
||||
"""Tool that calls Aider (local AI coding assistant) for code generation."""
|
||||
|
||||
def __init__(self, base_dir: Path):
|
||||
self.base_dir = base_dir
|
||||
|
||||
def run_aider(self, prompt: str, model: str = "qwen3:30b") -> str:
|
||||
"""Run Aider to generate code changes.
|
||||
|
||||
Args:
|
||||
prompt: What you want Aider to do (e.g., "add a fibonacci function")
|
||||
model: Ollama model to use (default: qwen3:30b)
|
||||
|
||||
Returns:
|
||||
Aider's response with the code changes made
|
||||
"""
|
||||
try:
|
||||
# Run aider with the prompt
|
||||
result = subprocess.run(
|
||||
[
|
||||
"aider",
|
||||
"--no-git",
|
||||
"--model",
|
||||
f"ollama/{model}",
|
||||
"--quiet",
|
||||
prompt,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
cwd=str(self.base_dir),
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
return result.stdout if result.stdout else "Code changes applied successfully"
|
||||
else:
|
||||
return f"Aider error: {result.stderr}"
|
||||
except FileNotFoundError:
|
||||
return "Error: Aider not installed. Run: pip install aider"
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Aider timed out after 120 seconds"
|
||||
except (OSError, subprocess.SubprocessError) as e:
|
||||
return f"Error running Aider: {str(e)}"
|
||||
|
||||
return AiderTool(base_path)
|
||||
|
||||
|
||||
def create_data_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the data agent (Seer).
|
||||
|
||||
Includes: python execution, file reading, web search for data sources
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="data")
|
||||
|
||||
# Python execution for analysis
|
||||
python_tools = PythonTools()
|
||||
toolkit.register(python_tools.run_python_code, name="python")
|
||||
|
||||
# File reading
|
||||
from config import settings
|
||||
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def create_writing_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the writing agent (Quill).
|
||||
|
||||
Includes: file read/write
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="writing")
|
||||
|
||||
# File operations
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.save_file, name="write_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def create_security_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the security agent (Mace).
|
||||
|
||||
Includes: shell commands (for scanning), file read
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="security")
|
||||
|
||||
# Shell for running security scans
|
||||
shell_tools = ShellTools()
|
||||
toolkit.register(shell_tools.run_shell_command, name="shell")
|
||||
|
||||
# File reading for logs/configs
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def create_devops_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the DevOps agent (Helm).
|
||||
|
||||
Includes: shell commands, file read/write
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="devops")
|
||||
|
||||
# Shell for deployment commands
|
||||
shell_tools = ShellTools()
|
||||
toolkit.register(shell_tools.run_shell_command, name="shell")
|
||||
|
||||
# File operations for config management
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.save_file, name="write_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def consult_grok(query: str) -> str:
|
||||
"""Consult Grok (xAI) for frontier reasoning on complex questions.
|
||||
|
||||
Use this tool when a question requires advanced reasoning, real-time
|
||||
knowledge, or capabilities beyond the local model. Grok is a premium
|
||||
cloud backend — use sparingly and only for high-complexity queries.
|
||||
|
||||
Args:
|
||||
query: The question or reasoning task to send to Grok.
|
||||
|
||||
Returns:
|
||||
Grok's response text, or an error/status message.
|
||||
"""
|
||||
from config import settings
|
||||
from timmy.backends import get_grok_backend, grok_available
|
||||
|
||||
if not grok_available():
|
||||
return (
|
||||
"Grok is not available. Enable with GROK_ENABLED=true "
|
||||
"and set XAI_API_KEY in your .env file."
|
||||
)
|
||||
|
||||
backend = get_grok_backend()
|
||||
|
||||
# Log to Spark if available
|
||||
try:
|
||||
from spark.engine import spark_engine
|
||||
|
||||
spark_engine.on_tool_executed(
|
||||
agent_id="default",
|
||||
tool_name="consult_grok",
|
||||
success=True,
|
||||
)
|
||||
except (ImportError, AttributeError) as exc:
|
||||
logger.warning("Tool execution failed (consult_grok logging): %s", exc)
|
||||
|
||||
# Generate Lightning invoice for monetization (unless free mode)
|
||||
invoice_info = ""
|
||||
if not settings.grok_free:
|
||||
try:
|
||||
from lightning.factory import get_backend as get_ln_backend
|
||||
|
||||
ln = get_ln_backend()
|
||||
sats = min(settings.grok_max_sats_per_query, settings.grok_sats_hard_cap)
|
||||
inv = ln.create_invoice(sats, f"Grok query: {query[:_INVOICE_MEMO_MAX_LEN]}")
|
||||
invoice_info = f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
|
||||
except (ImportError, OSError, ValueError) as exc:
|
||||
logger.error("Lightning invoice creation failed: %s", exc)
|
||||
return "Error: Failed to create Lightning invoice. Please check logs."
|
||||
|
||||
result = backend.run(query)
|
||||
|
||||
response = result.content
|
||||
if invoice_info:
|
||||
response += invoice_info
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def web_fetch(url: str, max_tokens: int = 4000) -> str:
|
||||
"""Fetch a web page and return its main text content.
|
||||
|
||||
Downloads the URL, extracts readable text using trafilatura, and
|
||||
truncates to a token budget. Use this to read full articles, docs,
|
||||
or blog posts that web_search only returns snippets for.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch (must start with http:// or https://).
|
||||
max_tokens: Maximum approximate token budget (default 4000).
|
||||
Text is truncated to max_tokens * 4 characters.
|
||||
|
||||
Returns:
|
||||
Extracted text content, or an error message on failure.
|
||||
"""
|
||||
if not url or not url.startswith(("http://", "https://")):
|
||||
return f"Error: invalid URL — must start with http:// or https://: {url!r}"
|
||||
|
||||
try:
|
||||
import requests as _requests
|
||||
except ImportError:
|
||||
return "Error: 'requests' package is not installed. Install with: pip install requests"
|
||||
|
||||
try:
|
||||
import trafilatura
|
||||
except ImportError:
|
||||
return (
|
||||
"Error: 'trafilatura' package is not installed. Install with: pip install trafilatura"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.get(
|
||||
url,
|
||||
timeout=15,
|
||||
headers={"User-Agent": "TimmyResearchBot/1.0"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except _requests.exceptions.Timeout:
|
||||
return f"Error: request timed out after 15 seconds for {url}"
|
||||
except _requests.exceptions.HTTPError as exc:
|
||||
return f"Error: HTTP {exc.response.status_code} for {url}"
|
||||
except _requests.exceptions.RequestException as exc:
|
||||
return f"Error: failed to fetch {url} — {exc}"
|
||||
|
||||
text = trafilatura.extract(resp.text, include_tables=True, include_links=True)
|
||||
if not text:
|
||||
return f"Error: could not extract readable content from {url}"
|
||||
|
||||
char_budget = max_tokens * 4
|
||||
if len(text) > char_budget:
|
||||
text = text[:char_budget] + f"\n\n[…truncated to ~{max_tokens} tokens]"
|
||||
|
||||
return text
|
||||
# ---------------------------------------------------------------------------
|
||||
# Internal _register_* helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _register_web_fetch_tool(toolkit: Toolkit) -> None:
|
||||
@@ -717,6 +233,11 @@ def _register_thinking_tools(toolkit: Toolkit) -> None:
|
||||
raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full toolkit factories
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def create_full_toolkit(base_dir: str | Path | None = None):
|
||||
"""Create a full toolkit with all available tools (for the orchestrator).
|
||||
|
||||
@@ -727,6 +248,7 @@ def create_full_toolkit(base_dir: str | Path | None = None):
|
||||
# Return None when tools aren't available (tests)
|
||||
return None
|
||||
|
||||
from config import settings
|
||||
from timmy.tool_safety import DANGEROUS_TOOLS
|
||||
|
||||
toolkit = Toolkit(name="full")
|
||||
@@ -808,19 +330,9 @@ def create_experiment_tools(base_dir: str | Path | None = None):
|
||||
return toolkit
|
||||
|
||||
|
||||
# Mapping of agent IDs to their toolkits
|
||||
AGENT_TOOLKITS: dict[str, Callable[[], Toolkit]] = {
|
||||
"echo": create_research_tools,
|
||||
"mace": create_security_tools,
|
||||
"helm": create_devops_tools,
|
||||
"seer": create_data_tools,
|
||||
"forge": create_code_tools,
|
||||
"quill": create_writing_tools,
|
||||
"lab": create_experiment_tools,
|
||||
"pixel": lambda base_dir=None: _create_stub_toolkit("pixel"),
|
||||
"lyra": lambda base_dir=None: _create_stub_toolkit("lyra"),
|
||||
"reel": lambda base_dir=None: _create_stub_toolkit("reel"),
|
||||
}
|
||||
# ---------------------------------------------------------------------------
|
||||
# Agent toolkit registry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _create_stub_toolkit(name: str):
|
||||
@@ -836,7 +348,22 @@ def _create_stub_toolkit(name: str):
|
||||
return toolkit
|
||||
|
||||
|
||||
def get_tools_for_agent(agent_id: str, base_dir: str | Path | None = None) -> Toolkit | None:
|
||||
# Mapping of agent IDs to their toolkits
|
||||
AGENT_TOOLKITS: dict[str, Callable[[], Toolkit]] = {
|
||||
"echo": create_research_tools,
|
||||
"mace": create_security_tools,
|
||||
"helm": create_devops_tools,
|
||||
"seer": create_data_tools,
|
||||
"forge": create_code_tools,
|
||||
"quill": create_writing_tools,
|
||||
"lab": create_experiment_tools,
|
||||
"pixel": lambda base_dir=None: _create_stub_toolkit("pixel"),
|
||||
"lyra": lambda base_dir=None: _create_stub_toolkit("lyra"),
|
||||
"reel": lambda base_dir=None: _create_stub_toolkit("reel"),
|
||||
}
|
||||
|
||||
|
||||
def get_tools_for_agent(agent_id: str, base_dir: str | Path | None = None) -> "Toolkit | None":
|
||||
"""Get the appropriate toolkit for an agent.
|
||||
|
||||
Args:
|
||||
@@ -852,11 +379,16 @@ def get_tools_for_agent(agent_id: str, base_dir: str | Path | None = None) -> To
|
||||
return None
|
||||
|
||||
|
||||
# Backward-compat alias
|
||||
# Backward-compat aliases
|
||||
get_tools_for_persona = get_tools_for_agent
|
||||
PERSONA_TOOLKITS = AGENT_TOOLKITS
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tool catalog
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _core_tool_catalog() -> dict:
|
||||
"""Return core file and execution tools catalog entries."""
|
||||
return {
|
||||
121
src/timmy/tools/file_tools.py
Normal file
121
src/timmy/tools/file_tools.py
Normal file
@@ -0,0 +1,121 @@
|
||||
"""File operation tools and agent toolkit factories for file-heavy agents.
|
||||
|
||||
Provides:
|
||||
- Smart read_file wrapper (auto-lists directories)
|
||||
- Toolkit factories for Echo (research), Quill (writing), Seer (data)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections.abc import Callable
|
||||
from pathlib import Path
|
||||
|
||||
from timmy.tools._base import (
|
||||
_AGNO_TOOLS_AVAILABLE,
|
||||
_ImportError,
|
||||
FileTools,
|
||||
PythonTools,
|
||||
Toolkit,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _make_smart_read_file(file_tools: "FileTools") -> Callable:
|
||||
"""Wrap FileTools.read_file so directories auto-list their contents.
|
||||
|
||||
When the user (or the LLM) passes a directory path to read_file,
|
||||
the raw Agno implementation throws an IsADirectoryError. This
|
||||
wrapper detects that case, lists the directory entries, and returns
|
||||
a helpful message so the model can pick the right file on its own.
|
||||
"""
|
||||
original_read = file_tools.read_file
|
||||
|
||||
def smart_read_file(file_name: str = "", encoding: str = "utf-8", **kwargs) -> str:
|
||||
"""Reads the contents of the file `file_name` and returns the contents if successful."""
|
||||
# LLMs often call read_file(path=...) instead of read_file(file_name=...)
|
||||
if not file_name:
|
||||
file_name = kwargs.get("path", "")
|
||||
if not file_name:
|
||||
return "Error: no file_name or path provided."
|
||||
# Resolve the path the same way FileTools does
|
||||
_safe, resolved = file_tools.check_escape(file_name)
|
||||
if _safe and resolved.is_dir():
|
||||
entries = sorted(p.name for p in resolved.iterdir() if not p.name.startswith("."))
|
||||
listing = "\n".join(f" - {e}" for e in entries) if entries else " (empty directory)"
|
||||
return (
|
||||
f"'{file_name}' is a directory, not a file. "
|
||||
f"Files inside:\n{listing}\n\n"
|
||||
"Please call read_file with one of the files listed above."
|
||||
)
|
||||
return original_read(file_name, encoding=encoding)
|
||||
|
||||
# Preserve the original docstring for Agno tool schema generation
|
||||
smart_read_file.__doc__ = original_read.__doc__
|
||||
return smart_read_file
|
||||
|
||||
|
||||
def create_research_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the research agent (Echo).
|
||||
|
||||
Includes: file reading
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="research")
|
||||
|
||||
# File reading
|
||||
from config import settings
|
||||
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def create_writing_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the writing agent (Quill).
|
||||
|
||||
Includes: file read/write
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="writing")
|
||||
|
||||
# File operations
|
||||
from config import settings
|
||||
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.save_file, name="write_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def create_data_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the data agent (Seer).
|
||||
|
||||
Includes: python execution, file reading, web search for data sources
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="data")
|
||||
|
||||
# Python execution for analysis
|
||||
python_tools = PythonTools()
|
||||
toolkit.register(python_tools.run_python_code, name="python")
|
||||
|
||||
# File reading
|
||||
from config import settings
|
||||
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
357
src/timmy/tools/system_tools.py
Normal file
357
src/timmy/tools/system_tools.py
Normal file
@@ -0,0 +1,357 @@
|
||||
"""System, calculation, and AI consultation tools for Timmy agents.
|
||||
|
||||
Provides:
|
||||
- Safe AST-based calculator
|
||||
- consult_grok (xAI frontier reasoning)
|
||||
- web_fetch (content extraction)
|
||||
- Toolkit factories for Forge (code), Mace (security), Helm (devops)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import logging
|
||||
import math
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from timmy.tools._base import (
|
||||
_AGNO_TOOLS_AVAILABLE,
|
||||
_ImportError,
|
||||
FileTools,
|
||||
PythonTools,
|
||||
ShellTools,
|
||||
Toolkit,
|
||||
)
|
||||
from timmy.tools.file_tools import _make_smart_read_file
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Max characters of user query included in Lightning invoice memo
|
||||
_INVOICE_MEMO_MAX_LEN = 50
|
||||
|
||||
|
||||
def _safe_eval(node, allowed_names: dict):
|
||||
"""Walk an AST and evaluate only safe numeric operations."""
|
||||
if isinstance(node, ast.Expression):
|
||||
return _safe_eval(node.body, allowed_names)
|
||||
if isinstance(node, ast.Constant):
|
||||
if isinstance(node.value, (int, float, complex)):
|
||||
return node.value
|
||||
raise ValueError(f"Unsupported constant: {node.value!r}")
|
||||
if isinstance(node, ast.UnaryOp):
|
||||
operand = _safe_eval(node.operand, allowed_names)
|
||||
if isinstance(node.op, ast.UAdd):
|
||||
return +operand
|
||||
if isinstance(node.op, ast.USub):
|
||||
return -operand
|
||||
raise ValueError(f"Unsupported unary op: {type(node.op).__name__}")
|
||||
if isinstance(node, ast.BinOp):
|
||||
left = _safe_eval(node.left, allowed_names)
|
||||
right = _safe_eval(node.right, allowed_names)
|
||||
ops = {
|
||||
ast.Add: lambda a, b: a + b,
|
||||
ast.Sub: lambda a, b: a - b,
|
||||
ast.Mult: lambda a, b: a * b,
|
||||
ast.Div: lambda a, b: a / b,
|
||||
ast.FloorDiv: lambda a, b: a // b,
|
||||
ast.Mod: lambda a, b: a % b,
|
||||
ast.Pow: lambda a, b: a**b,
|
||||
}
|
||||
op_fn = ops.get(type(node.op))
|
||||
if op_fn is None:
|
||||
raise ValueError(f"Unsupported binary op: {type(node.op).__name__}")
|
||||
return op_fn(left, right)
|
||||
if isinstance(node, ast.Name):
|
||||
if node.id in allowed_names:
|
||||
return allowed_names[node.id]
|
||||
raise ValueError(f"Unknown name: {node.id!r}")
|
||||
if isinstance(node, ast.Attribute):
|
||||
value = _safe_eval(node.value, allowed_names)
|
||||
# Only allow attribute access on the math module
|
||||
if value is math:
|
||||
attr = getattr(math, node.attr, None)
|
||||
if attr is not None:
|
||||
return attr
|
||||
raise ValueError(f"Attribute access not allowed: .{node.attr}")
|
||||
if isinstance(node, ast.Call):
|
||||
func = _safe_eval(node.func, allowed_names)
|
||||
if not callable(func):
|
||||
raise ValueError(f"Not callable: {func!r}")
|
||||
args = [_safe_eval(a, allowed_names) for a in node.args]
|
||||
kwargs = {kw.arg: _safe_eval(kw.value, allowed_names) for kw in node.keywords}
|
||||
return func(*args, **kwargs)
|
||||
raise ValueError(f"Unsupported syntax: {type(node).__name__}")
|
||||
|
||||
|
||||
def calculator(expression: str) -> str:
|
||||
"""Evaluate a mathematical expression and return the exact result.
|
||||
|
||||
Use this tool for ANY arithmetic: multiplication, division, square roots,
|
||||
exponents, percentages, logarithms, trigonometry, etc.
|
||||
|
||||
Args:
|
||||
expression: A valid Python math expression, e.g. '347 * 829',
|
||||
'math.sqrt(17161)', '2**10', 'math.log(100, 10)'.
|
||||
|
||||
Returns:
|
||||
The exact result as a string.
|
||||
"""
|
||||
allowed_names = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")}
|
||||
allowed_names["math"] = math
|
||||
allowed_names["abs"] = abs
|
||||
allowed_names["round"] = round
|
||||
allowed_names["min"] = min
|
||||
allowed_names["max"] = max
|
||||
try:
|
||||
tree = ast.parse(expression, mode="eval")
|
||||
result = _safe_eval(tree, allowed_names)
|
||||
return str(result)
|
||||
except Exception as e: # broad catch intentional: arbitrary code execution
|
||||
return f"Error evaluating '{expression}': {e}"
|
||||
|
||||
|
||||
def consult_grok(query: str) -> str:
|
||||
"""Consult Grok (xAI) for frontier reasoning on complex questions.
|
||||
|
||||
Use this tool when a question requires advanced reasoning, real-time
|
||||
knowledge, or capabilities beyond the local model. Grok is a premium
|
||||
cloud backend — use sparingly and only for high-complexity queries.
|
||||
|
||||
Args:
|
||||
query: The question or reasoning task to send to Grok.
|
||||
|
||||
Returns:
|
||||
Grok's response text, or an error/status message.
|
||||
"""
|
||||
from config import settings
|
||||
from timmy.backends import get_grok_backend, grok_available
|
||||
|
||||
if not grok_available():
|
||||
return (
|
||||
"Grok is not available. Enable with GROK_ENABLED=true "
|
||||
"and set XAI_API_KEY in your .env file."
|
||||
)
|
||||
|
||||
backend = get_grok_backend()
|
||||
|
||||
# Log to Spark if available
|
||||
try:
|
||||
from spark.engine import spark_engine
|
||||
|
||||
spark_engine.on_tool_executed(
|
||||
agent_id="default",
|
||||
tool_name="consult_grok",
|
||||
success=True,
|
||||
)
|
||||
except (ImportError, AttributeError) as exc:
|
||||
logger.warning("Tool execution failed (consult_grok logging): %s", exc)
|
||||
|
||||
# Generate Lightning invoice for monetization (unless free mode)
|
||||
invoice_info = ""
|
||||
if not settings.grok_free:
|
||||
try:
|
||||
from lightning.factory import get_backend as get_ln_backend
|
||||
|
||||
ln = get_ln_backend()
|
||||
sats = min(settings.grok_max_sats_per_query, settings.grok_sats_hard_cap)
|
||||
inv = ln.create_invoice(sats, f"Grok query: {query[:_INVOICE_MEMO_MAX_LEN]}")
|
||||
invoice_info = f"\n[Lightning invoice: {sats} sats — {inv.payment_request[:40]}...]"
|
||||
except (ImportError, OSError, ValueError) as exc:
|
||||
logger.error("Lightning invoice creation failed: %s", exc)
|
||||
return "Error: Failed to create Lightning invoice. Please check logs."
|
||||
|
||||
result = backend.run(query)
|
||||
|
||||
response = result.content
|
||||
if invoice_info:
|
||||
response += invoice_info
|
||||
|
||||
return response
|
||||
|
||||
|
||||
def web_fetch(url: str, max_tokens: int = 4000) -> str:
|
||||
"""Fetch a web page and return its main text content.
|
||||
|
||||
Downloads the URL, extracts readable text using trafilatura, and
|
||||
truncates to a token budget. Use this to read full articles, docs,
|
||||
or blog posts that web_search only returns snippets for.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch (must start with http:// or https://).
|
||||
max_tokens: Maximum approximate token budget (default 4000).
|
||||
Text is truncated to max_tokens * 4 characters.
|
||||
|
||||
Returns:
|
||||
Extracted text content, or an error message on failure.
|
||||
"""
|
||||
if not url or not url.startswith(("http://", "https://")):
|
||||
return f"Error: invalid URL — must start with http:// or https://: {url!r}"
|
||||
|
||||
try:
|
||||
import requests as _requests
|
||||
except ImportError:
|
||||
return "Error: 'requests' package is not installed. Install with: pip install requests"
|
||||
|
||||
try:
|
||||
import trafilatura
|
||||
except ImportError:
|
||||
return (
|
||||
"Error: 'trafilatura' package is not installed. Install with: pip install trafilatura"
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _requests.get(
|
||||
url,
|
||||
timeout=15,
|
||||
headers={"User-Agent": "TimmyResearchBot/1.0"},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except _requests.exceptions.Timeout:
|
||||
return f"Error: request timed out after 15 seconds for {url}"
|
||||
except _requests.exceptions.HTTPError as exc:
|
||||
return f"Error: HTTP {exc.response.status_code} for {url}"
|
||||
except _requests.exceptions.RequestException as exc:
|
||||
return f"Error: failed to fetch {url} — {exc}"
|
||||
|
||||
text = trafilatura.extract(resp.text, include_tables=True, include_links=True)
|
||||
if not text:
|
||||
return f"Error: could not extract readable content from {url}"
|
||||
|
||||
char_budget = max_tokens * 4
|
||||
if len(text) > char_budget:
|
||||
text = text[:char_budget] + f"\n\n[…truncated to ~{max_tokens} tokens]"
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def create_aider_tool(base_path: Path):
|
||||
"""Create an Aider tool for AI-assisted coding."""
|
||||
|
||||
class AiderTool:
|
||||
"""Tool that calls Aider (local AI coding assistant) for code generation."""
|
||||
|
||||
def __init__(self, base_dir: Path):
|
||||
self.base_dir = base_dir
|
||||
|
||||
def run_aider(self, prompt: str, model: str = "qwen3:30b") -> str:
|
||||
"""Run Aider to generate code changes.
|
||||
|
||||
Args:
|
||||
prompt: What you want Aider to do (e.g., "add a fibonacci function")
|
||||
model: Ollama model to use (default: qwen3:30b)
|
||||
|
||||
Returns:
|
||||
Aider's response with the code changes made
|
||||
"""
|
||||
try:
|
||||
# Run aider with the prompt
|
||||
result = subprocess.run(
|
||||
[
|
||||
"aider",
|
||||
"--no-git",
|
||||
"--model",
|
||||
f"ollama/{model}",
|
||||
"--quiet",
|
||||
prompt,
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120,
|
||||
cwd=str(self.base_dir),
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
return result.stdout if result.stdout else "Code changes applied successfully"
|
||||
else:
|
||||
return f"Aider error: {result.stderr}"
|
||||
except FileNotFoundError:
|
||||
return "Error: Aider not installed. Run: pip install aider"
|
||||
except subprocess.TimeoutExpired:
|
||||
return "Error: Aider timed out after 120 seconds"
|
||||
except (OSError, subprocess.SubprocessError) as e:
|
||||
return f"Error running Aider: {str(e)}"
|
||||
|
||||
return AiderTool(base_path)
|
||||
|
||||
|
||||
def create_code_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the code agent (Forge).
|
||||
|
||||
Includes: shell commands, python execution, file read/write, Aider AI assist
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="code")
|
||||
|
||||
# Shell commands (sandboxed)
|
||||
shell_tools = ShellTools()
|
||||
toolkit.register(shell_tools.run_shell_command, name="shell")
|
||||
|
||||
# Python execution
|
||||
python_tools = PythonTools()
|
||||
toolkit.register(python_tools.run_python_code, name="python")
|
||||
|
||||
# File operations
|
||||
from config import settings
|
||||
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.save_file, name="write_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
# Aider AI coding assistant (local with Ollama)
|
||||
aider_tool = create_aider_tool(base_path)
|
||||
toolkit.register(aider_tool.run_aider, name="aider")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def create_security_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the security agent (Mace).
|
||||
|
||||
Includes: shell commands (for scanning), file read
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="security")
|
||||
|
||||
# Shell for running security scans
|
||||
shell_tools = ShellTools()
|
||||
toolkit.register(shell_tools.run_shell_command, name="shell")
|
||||
|
||||
# File reading for logs/configs
|
||||
from config import settings
|
||||
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
|
||||
|
||||
def create_devops_tools(base_dir: str | Path | None = None):
|
||||
"""Create tools for the DevOps agent (Helm).
|
||||
|
||||
Includes: shell commands, file read/write
|
||||
"""
|
||||
if not _AGNO_TOOLS_AVAILABLE:
|
||||
raise ImportError(f"Agno tools not available: {_ImportError}")
|
||||
toolkit = Toolkit(name="devops")
|
||||
|
||||
# Shell for deployment commands
|
||||
shell_tools = ShellTools()
|
||||
toolkit.register(shell_tools.run_shell_command, name="shell")
|
||||
|
||||
# File operations for config management
|
||||
from config import settings
|
||||
|
||||
base_path = Path(base_dir) if base_dir else Path(settings.repo_root)
|
||||
file_tools = FileTools(base_dir=base_path)
|
||||
toolkit.register(_make_smart_read_file(file_tools), name="read_file")
|
||||
toolkit.register(file_tools.save_file, name="write_file")
|
||||
toolkit.register(file_tools.list_files, name="list_files")
|
||||
|
||||
return toolkit
|
||||
444
tests/timmy/test_session_report.py
Normal file
444
tests/timmy/test_session_report.py
Normal file
@@ -0,0 +1,444 @@
|
||||
"""Tests for timmy.sovereignty.session_report.
|
||||
|
||||
Refs: #957 (Session Sovereignty Report Generator)
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import time
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
from timmy.sovereignty.session_report import (
|
||||
_format_duration,
|
||||
_gather_session_data,
|
||||
_gather_sovereignty_data,
|
||||
_render_markdown,
|
||||
commit_report,
|
||||
generate_and_commit_report,
|
||||
generate_report,
|
||||
mark_session_start,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _format_duration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFormatDuration:
|
||||
def test_seconds_only(self):
|
||||
assert _format_duration(45) == "45s"
|
||||
|
||||
def test_minutes_and_seconds(self):
|
||||
assert _format_duration(125) == "2m 5s"
|
||||
|
||||
def test_hours_minutes_seconds(self):
|
||||
assert _format_duration(3661) == "1h 1m 1s"
|
||||
|
||||
def test_zero(self):
|
||||
assert _format_duration(0) == "0s"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# mark_session_start + generate_report (smoke)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMarkSessionStart:
|
||||
def test_sets_session_start(self):
|
||||
import timmy.sovereignty.session_report as sr
|
||||
|
||||
sr._SESSION_START = None
|
||||
mark_session_start()
|
||||
assert sr._SESSION_START is not None
|
||||
assert sr._SESSION_START.tzinfo == UTC
|
||||
|
||||
def test_idempotent_overwrite(self):
|
||||
import timmy.sovereignty.session_report as sr
|
||||
|
||||
mark_session_start()
|
||||
first = sr._SESSION_START
|
||||
time.sleep(0.01)
|
||||
mark_session_start()
|
||||
second = sr._SESSION_START
|
||||
assert second >= first
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _gather_session_data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGatherSessionData:
|
||||
def test_returns_defaults_when_no_file(self, tmp_path):
|
||||
mock_logger = MagicMock()
|
||||
mock_logger.flush.return_value = None
|
||||
mock_logger.session_file = tmp_path / "nonexistent.jsonl"
|
||||
|
||||
with patch(
|
||||
"timmy.sovereignty.session_report.get_session_logger",
|
||||
return_value=mock_logger,
|
||||
):
|
||||
data = _gather_session_data()
|
||||
|
||||
assert data["user_messages"] == 0
|
||||
assert data["timmy_messages"] == 0
|
||||
assert data["tool_calls"] == 0
|
||||
assert data["errors"] == 0
|
||||
assert data["tool_call_breakdown"] == {}
|
||||
|
||||
def test_counts_entries_correctly(self, tmp_path):
|
||||
session_file = tmp_path / "session_2026-03-23.jsonl"
|
||||
entries = [
|
||||
{"type": "message", "role": "user", "content": "hello"},
|
||||
{"type": "message", "role": "timmy", "content": "hi"},
|
||||
{"type": "message", "role": "user", "content": "test"},
|
||||
{"type": "tool_call", "tool": "memory_search", "args": {}, "result": "found"},
|
||||
{"type": "tool_call", "tool": "memory_search", "args": {}, "result": "nope"},
|
||||
{"type": "tool_call", "tool": "shell", "args": {}, "result": "ok"},
|
||||
{"type": "error", "error": "boom"},
|
||||
]
|
||||
with open(session_file, "w") as f:
|
||||
for e in entries:
|
||||
f.write(json.dumps(e) + "\n")
|
||||
|
||||
mock_logger = MagicMock()
|
||||
mock_logger.flush.return_value = None
|
||||
mock_logger.session_file = session_file
|
||||
|
||||
with patch(
|
||||
"timmy.sovereignty.session_report.get_session_logger",
|
||||
return_value=mock_logger,
|
||||
):
|
||||
data = _gather_session_data()
|
||||
|
||||
assert data["user_messages"] == 2
|
||||
assert data["timmy_messages"] == 1
|
||||
assert data["tool_calls"] == 3
|
||||
assert data["errors"] == 1
|
||||
assert data["tool_call_breakdown"]["memory_search"] == 2
|
||||
assert data["tool_call_breakdown"]["shell"] == 1
|
||||
|
||||
def test_graceful_on_import_error(self):
|
||||
with patch(
|
||||
"timmy.sovereignty.session_report.get_session_logger",
|
||||
side_effect=ImportError("no session_logger"),
|
||||
):
|
||||
data = _gather_session_data()
|
||||
|
||||
assert data["tool_calls"] == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _gather_sovereignty_data
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGatherSovereigntyData:
|
||||
def test_returns_empty_on_import_error(self):
|
||||
with patch.dict("sys.modules", {"infrastructure.sovereignty_metrics": None}):
|
||||
with patch(
|
||||
"timmy.sovereignty.session_report.get_sovereignty_store",
|
||||
side_effect=ImportError("no store"),
|
||||
):
|
||||
data = _gather_sovereignty_data()
|
||||
|
||||
assert data["metrics"] == {}
|
||||
assert data["deltas"] == {}
|
||||
assert data["previous_session"] == {}
|
||||
|
||||
def test_populates_deltas_from_history(self):
|
||||
mock_store = MagicMock()
|
||||
mock_store.get_summary.return_value = {
|
||||
"cache_hit_rate": {"current": 0.5, "phase": "week1"},
|
||||
}
|
||||
# get_latest returns newest-first
|
||||
mock_store.get_latest.return_value = [
|
||||
{"value": 0.5},
|
||||
{"value": 0.3},
|
||||
{"value": 0.1},
|
||||
]
|
||||
|
||||
with patch(
|
||||
"timmy.sovereignty.session_report.get_sovereignty_store",
|
||||
return_value=mock_store,
|
||||
):
|
||||
with patch(
|
||||
"timmy.sovereignty.session_report.GRADUATION_TARGETS",
|
||||
{"cache_hit_rate": {"graduation": 0.9}},
|
||||
):
|
||||
data = _gather_sovereignty_data()
|
||||
|
||||
delta = data["deltas"].get("cache_hit_rate")
|
||||
assert delta is not None
|
||||
assert delta["start"] == 0.1 # oldest in window
|
||||
assert delta["end"] == 0.5 # most recent
|
||||
assert data["previous_session"]["cache_hit_rate"] == 0.3
|
||||
|
||||
def test_single_data_point_no_delta(self):
|
||||
mock_store = MagicMock()
|
||||
mock_store.get_summary.return_value = {}
|
||||
mock_store.get_latest.return_value = [{"value": 0.4}]
|
||||
|
||||
with patch(
|
||||
"timmy.sovereignty.session_report.get_sovereignty_store",
|
||||
return_value=mock_store,
|
||||
):
|
||||
with patch(
|
||||
"timmy.sovereignty.session_report.GRADUATION_TARGETS",
|
||||
{"api_cost": {"graduation": 0.01}},
|
||||
):
|
||||
data = _gather_sovereignty_data()
|
||||
|
||||
delta = data["deltas"]["api_cost"]
|
||||
assert delta["start"] == 0.4
|
||||
assert delta["end"] == 0.4
|
||||
assert data["previous_session"]["api_cost"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# generate_report (integration — smoke test)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGenerateReport:
|
||||
def _minimal_session_data(self):
|
||||
return {
|
||||
"user_messages": 3,
|
||||
"timmy_messages": 3,
|
||||
"tool_calls": 2,
|
||||
"errors": 0,
|
||||
"tool_call_breakdown": {"memory_search": 2},
|
||||
}
|
||||
|
||||
def _minimal_sov_data(self):
|
||||
return {
|
||||
"metrics": {
|
||||
"cache_hit_rate": {"current": 0.45, "phase": "week1"},
|
||||
"api_cost": {"current": 0.12, "phase": "pre-start"},
|
||||
},
|
||||
"deltas": {
|
||||
"cache_hit_rate": {"start": 0.40, "end": 0.45},
|
||||
"api_cost": {"start": 0.10, "end": 0.12},
|
||||
},
|
||||
"previous_session": {
|
||||
"cache_hit_rate": 0.40,
|
||||
"api_cost": 0.10,
|
||||
},
|
||||
}
|
||||
|
||||
def test_smoke_produces_markdown(self):
|
||||
with (
|
||||
patch(
|
||||
"timmy.sovereignty.session_report._gather_session_data",
|
||||
return_value=self._minimal_session_data(),
|
||||
),
|
||||
patch(
|
||||
"timmy.sovereignty.session_report._gather_sovereignty_data",
|
||||
return_value=self._minimal_sov_data(),
|
||||
),
|
||||
):
|
||||
report = generate_report("test-session")
|
||||
|
||||
assert "# Sovereignty Session Report" in report
|
||||
assert "test-session" in report
|
||||
assert "## Session Activity" in report
|
||||
assert "## Sovereignty Scorecard" in report
|
||||
assert "## Cost Breakdown" in report
|
||||
assert "## Trend vs Previous Session" in report
|
||||
|
||||
def test_report_contains_session_stats(self):
|
||||
with (
|
||||
patch(
|
||||
"timmy.sovereignty.session_report._gather_session_data",
|
||||
return_value=self._minimal_session_data(),
|
||||
),
|
||||
patch(
|
||||
"timmy.sovereignty.session_report._gather_sovereignty_data",
|
||||
return_value=self._minimal_sov_data(),
|
||||
),
|
||||
):
|
||||
report = generate_report()
|
||||
|
||||
assert "| User messages | 3 |" in report
|
||||
assert "memory_search" in report
|
||||
|
||||
def test_report_no_previous_session(self):
|
||||
sov = self._minimal_sov_data()
|
||||
sov["previous_session"] = {"cache_hit_rate": None, "api_cost": None}
|
||||
|
||||
with (
|
||||
patch(
|
||||
"timmy.sovereignty.session_report._gather_session_data",
|
||||
return_value=self._minimal_session_data(),
|
||||
),
|
||||
patch(
|
||||
"timmy.sovereignty.session_report._gather_sovereignty_data",
|
||||
return_value=sov,
|
||||
),
|
||||
):
|
||||
report = generate_report()
|
||||
|
||||
assert "No previous session data" in report
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# commit_report
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCommitReport:
|
||||
def test_returns_false_when_gitea_disabled(self):
|
||||
with patch("timmy.sovereignty.session_report.settings") as mock_settings:
|
||||
mock_settings.gitea_enabled = False
|
||||
result = commit_report("# test", "dashboard")
|
||||
|
||||
assert result is False
|
||||
|
||||
def test_returns_false_when_no_token(self):
|
||||
with patch("timmy.sovereignty.session_report.settings") as mock_settings:
|
||||
mock_settings.gitea_enabled = True
|
||||
mock_settings.gitea_token = ""
|
||||
result = commit_report("# test", "dashboard")
|
||||
|
||||
assert result is False
|
||||
|
||||
def test_creates_file_via_put(self):
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 201
|
||||
mock_response.raise_for_status.return_value = None
|
||||
|
||||
mock_check = MagicMock()
|
||||
mock_check.status_code = 404 # file does not exist yet
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
mock_client.get.return_value = mock_check
|
||||
mock_client.put.return_value = mock_response
|
||||
|
||||
with (
|
||||
patch("timmy.sovereignty.session_report.settings") as mock_settings,
|
||||
patch("timmy.sovereignty.session_report.httpx.Client", return_value=mock_client),
|
||||
):
|
||||
mock_settings.gitea_enabled = True
|
||||
mock_settings.gitea_token = "fake-token"
|
||||
mock_settings.gitea_url = "http://localhost:3000"
|
||||
mock_settings.gitea_repo = "owner/repo"
|
||||
|
||||
result = commit_report("# report content", "dashboard")
|
||||
|
||||
assert result is True
|
||||
mock_client.put.assert_called_once()
|
||||
call_kwargs = mock_client.put.call_args
|
||||
payload = call_kwargs.kwargs.get("json", call_kwargs.args[1] if len(call_kwargs.args) > 1 else {})
|
||||
decoded = base64.b64decode(payload["content"]).decode()
|
||||
assert "# report content" in decoded
|
||||
|
||||
def test_updates_existing_file_with_sha(self):
|
||||
mock_check = MagicMock()
|
||||
mock_check.status_code = 200
|
||||
mock_check.json.return_value = {"sha": "abc123"}
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.raise_for_status.return_value = None
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
mock_client.get.return_value = mock_check
|
||||
mock_client.put.return_value = mock_response
|
||||
|
||||
with (
|
||||
patch("timmy.sovereignty.session_report.settings") as mock_settings,
|
||||
patch("timmy.sovereignty.session_report.httpx.Client", return_value=mock_client),
|
||||
):
|
||||
mock_settings.gitea_enabled = True
|
||||
mock_settings.gitea_token = "fake-token"
|
||||
mock_settings.gitea_url = "http://localhost:3000"
|
||||
mock_settings.gitea_repo = "owner/repo"
|
||||
|
||||
result = commit_report("# updated", "dashboard")
|
||||
|
||||
assert result is True
|
||||
payload = mock_client.put.call_args.kwargs.get("json", {})
|
||||
assert payload.get("sha") == "abc123"
|
||||
|
||||
def test_returns_false_on_http_error(self):
|
||||
import httpx
|
||||
|
||||
mock_check = MagicMock()
|
||||
mock_check.status_code = 404
|
||||
|
||||
mock_client = MagicMock()
|
||||
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
||||
mock_client.__exit__ = MagicMock(return_value=False)
|
||||
mock_client.get.return_value = mock_check
|
||||
mock_client.put.side_effect = httpx.HTTPStatusError(
|
||||
"403", request=MagicMock(), response=MagicMock(status_code=403)
|
||||
)
|
||||
|
||||
with (
|
||||
patch("timmy.sovereignty.session_report.settings") as mock_settings,
|
||||
patch("timmy.sovereignty.session_report.httpx.Client", return_value=mock_client),
|
||||
):
|
||||
mock_settings.gitea_enabled = True
|
||||
mock_settings.gitea_token = "fake-token"
|
||||
mock_settings.gitea_url = "http://localhost:3000"
|
||||
mock_settings.gitea_repo = "owner/repo"
|
||||
|
||||
result = commit_report("# test", "dashboard")
|
||||
|
||||
assert result is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# generate_and_commit_report (async)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGenerateAndCommitReport:
|
||||
async def test_returns_true_on_success(self):
|
||||
with (
|
||||
patch(
|
||||
"timmy.sovereignty.session_report.generate_report",
|
||||
return_value="# mock report",
|
||||
),
|
||||
patch(
|
||||
"timmy.sovereignty.session_report.commit_report",
|
||||
return_value=True,
|
||||
),
|
||||
):
|
||||
result = await generate_and_commit_report("test")
|
||||
|
||||
assert result is True
|
||||
|
||||
async def test_returns_false_when_commit_fails(self):
|
||||
with (
|
||||
patch(
|
||||
"timmy.sovereignty.session_report.generate_report",
|
||||
return_value="# mock report",
|
||||
),
|
||||
patch(
|
||||
"timmy.sovereignty.session_report.commit_report",
|
||||
return_value=False,
|
||||
),
|
||||
):
|
||||
result = await generate_and_commit_report()
|
||||
|
||||
assert result is False
|
||||
|
||||
async def test_graceful_on_exception(self):
|
||||
with patch(
|
||||
"timmy.sovereignty.session_report.generate_report",
|
||||
side_effect=RuntimeError("explode"),
|
||||
):
|
||||
result = await generate_and_commit_report()
|
||||
|
||||
assert result is False
|
||||
Reference in New Issue
Block a user