Compare commits

...

9 Commits

Author SHA1 Message Date
f6e6e83a6c feat: thinking engine test suite (117 tests) + sovereign stack manifest (#1314, #986)
## Thinking Engine Tests (#1314)
- New: tests/timmy/test_thinking_engine.py — 117 tests across 21 test classes
- Covers ThinkingEngine core + all 4 mixin classes:
  - engine.py: init, idle detection, store/retrieve, pruning, dedup, continuity,
    context assembly, novel thought generation, think_once, journal, broadcast
  - _distillation.py: should_distill, build_distill_prompt, parse_facts_response,
    filter_and_store_facts, maybe_distill
  - _issue_filing.py: references_real_files, get_recent_thoughts_for_issues,
    build_issue_classify_prompt, parse_issue_items, file_single_issue
  - _seeds_mixin.py: pick_seed_type, gather_seed, all seed sources, check_workspace
  - _snapshot.py: system snapshot, memory context, update_memory
  - _db.py: get_conn, row_to_thought, Thought dataclass
  - seeds.py: constants, prompt template, think tag regex
- Targets 80%+ coverage of engine.py's 430 lines

## Stack Manifest (#986)
- New: docs/stack_manifest.json — 8 categories, 40+ tools with pinned versions
  - LLM Inference, Coding Agents, Image Gen, Music/Voice, Orchestration,
    Nostr+Lightning+Bitcoin, Memory/KG, Streaming/Content
  - Schema: {tool, version, role, install_command, license, status}
- New: src/timmy/stack_manifest.py — query_stack() runtime tool
  - Category and tool filtering (case-insensitive, partial match)
  - Manifest caching, graceful error handling
- New: tests/timmy/test_stack_manifest.py — 24 tests
- Registered query_stack in tool registry + tool catalog
- Total: 141 new tests, all passing
2026-03-24 03:20:34 +00:00
9e9dd5309a [kimi] Fix: stub cv2 in tests to prevent timeout (#1336) (#1356)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
Co-authored-by: Kimi Agent <kimi@timmy.local>
Co-committed-by: Kimi Agent <kimi@timmy.local>
2026-03-24 02:59:52 +00:00
36f3f1b3a7 [claude] Add unit tests for tools/system_tools.py (#1345) (#1354)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
2026-03-24 02:56:35 +00:00
6a2a0377d2 [loop-cycle-1] fix: thread timeout method for xdist compatibility (#1336) (#1355)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
2026-03-24 02:56:19 +00:00
cd0f718d6b [claude] fix: restore live timestamp to HotMemory.read() (#1339) (#1353)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
2026-03-24 02:55:48 +00:00
cddfd09c01 [claude] Add unit tests for spark/engine.py (#1343) (#1352)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
2026-03-24 02:52:15 +00:00
d0b6d87eb1 [perplexity] feat: Nexus v2 — Cognitive Awareness & Introspection Engine (#1090) (#1348)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
Co-authored-by: Perplexity Computer <perplexity@tower.local>
Co-committed-by: Perplexity Computer <perplexity@tower.local>
2026-03-24 02:50:40 +00:00
9e8e0f8552 [claude] Placeholder research artifact for issue #1341 (#1350)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
2026-03-24 02:49:37 +00:00
e09082a8a8 [kimi] Add comprehensive unit tests for models/budget.py (#1316) (#1347)
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
2026-03-24 02:48:51 +00:00
24 changed files with 5641 additions and 39 deletions

347
docs/stack_manifest.json Normal file
View File

@@ -0,0 +1,347 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Timmy Sovereign Tech Stack Manifest",
"description": "Machine-readable catalog of every tool in the sovereign stack. Queryable by Timmy at runtime via query_stack().",
"version": "1.0.0",
"generated": "2026-03-24",
"source_issue": "#986",
"parent_issue": "#982",
"categories": [
{
"id": "llm_inference",
"name": "Local LLM Inference",
"description": "On-device language model serving — no cloud required",
"tools": [
{
"tool": "vllm-mlx",
"version": "latest",
"role": "High-throughput LLM inference on Apple Silicon via MLX backend",
"install_command": "pip install vllm-mlx",
"license": "Apache-2.0",
"status": "active"
},
{
"tool": "Ollama",
"version": "0.18.2",
"role": "Primary local LLM runtime — serves Qwen3, Llama, DeepSeek models",
"install_command": "curl -fsSL https://ollama.com/install.sh | sh",
"license": "MIT",
"status": "active"
},
{
"tool": "mlx-lm",
"version": "0.31.1",
"role": "Apple MLX native language model inference and fine-tuning",
"install_command": "pip install mlx-lm==0.31.1",
"license": "MIT",
"status": "active"
},
{
"tool": "exo",
"version": "1.0-EA",
"role": "Distributed LLM inference across heterogeneous devices",
"install_command": "pip install exo",
"license": "GPL-3.0",
"status": "experimental"
}
]
},
{
"id": "coding_agents",
"name": "AI Coding Agents",
"description": "Autonomous code generation, review, and self-modification",
"tools": [
{
"tool": "Goose",
"version": "1.20.1",
"role": "AI coding agent for autonomous code generation and refactoring",
"install_command": "brew install block/goose/goose",
"license": "Apache-2.0",
"status": "active"
},
{
"tool": "OpenHands",
"version": "1.5.0",
"role": "Open-source AI software engineer for complex multi-file changes",
"install_command": "pip install openhands==1.5.0",
"license": "MIT",
"status": "active"
},
{
"tool": "Aider",
"version": "latest",
"role": "AI pair programmer using local Ollama models (qwen3, deepseek-coder)",
"install_command": "pip install aider-chat",
"license": "Apache-2.0",
"status": "active"
},
{
"tool": "mini-swe-agent",
"version": "2.0",
"role": "Lightweight software engineering agent for targeted fixes",
"install_command": "pip install mini-swe-agent",
"license": "MIT",
"status": "active"
},
{
"tool": "Forgejo",
"version": "14.0.3",
"role": "Self-hosted Git forge (Gitea fork) — sovereign code hosting",
"install_command": "docker pull forgejo/forgejo:14.0.3",
"license": "MIT",
"status": "active"
}
]
},
{
"id": "image_generation",
"name": "Image Generation",
"description": "Local image synthesis — avatars, art, visual content",
"tools": [
{
"tool": "ComfyUI",
"version": "0.17.2",
"role": "Node-based image generation pipeline with FLUX model support",
"install_command": "git clone https://github.com/comfyanonymous/ComfyUI && pip install -r requirements.txt",
"license": "GPL-3.0",
"status": "active"
},
{
"tool": "Draw Things",
"version": "latest",
"role": "macOS-native image generation app with Metal acceleration",
"install_command": "mas install 6450292044",
"license": "Proprietary (free)",
"status": "active"
},
{
"tool": "FLUX.1 Dev GGUF Q8",
"version": "1.0",
"role": "Quantized FLUX.1 model for high-quality local image generation",
"install_command": "ollama pull flux.1-dev-q8",
"license": "FLUX.1-dev-non-commercial",
"status": "active"
},
{
"tool": "FLUX.2 Klein",
"version": "2.0",
"role": "Fast lightweight FLUX model for rapid image prototyping",
"install_command": "comfyui-manager install flux2-klein",
"license": "Apache-2.0",
"status": "active"
}
]
},
{
"id": "music_voice",
"name": "Music and Voice",
"description": "Audio synthesis — music generation, text-to-speech, voice cloning",
"tools": [
{
"tool": "ACE-Step",
"version": "1.5",
"role": "Local music generation — 30s loops in under 60s on Apple Silicon",
"install_command": "pip install ace-step==1.5",
"license": "Apache-2.0",
"status": "active"
},
{
"tool": "mlx-audio",
"version": "0.4.1",
"role": "Apple MLX native audio processing and text-to-speech",
"install_command": "pip install mlx-audio==0.4.1",
"license": "MIT",
"status": "active"
},
{
"tool": "Piper TTS",
"version": "1.4.1",
"role": "Fast local neural text-to-speech with multiple voice models",
"install_command": "pip install piper-tts==1.4.1",
"license": "MIT",
"status": "active"
},
{
"tool": "GPT-SoVITS",
"version": "v2pro",
"role": "Voice cloning and singing voice synthesis from few-shot samples",
"install_command": "git clone https://github.com/RVC-Boss/GPT-SoVITS && pip install -r requirements.txt",
"license": "MIT",
"status": "active"
}
]
},
{
"id": "agent_orchestration",
"name": "Agent Orchestration",
"description": "Multi-agent coordination, MCP servers, workflow engines",
"tools": [
{
"tool": "FastMCP",
"version": "3.1.1",
"role": "Model Context Protocol server framework — tool registration for agents",
"install_command": "pip install fastmcp==3.1.1",
"license": "MIT",
"status": "active"
},
{
"tool": "PocketFlow",
"version": "latest",
"role": "Lightweight agent workflow engine for multi-step task orchestration",
"install_command": "pip install pocketflow",
"license": "MIT",
"status": "active"
},
{
"tool": "CrewAI",
"version": "1.11.0",
"role": "Multi-agent collaboration framework for complex task decomposition",
"install_command": "pip install crewai==1.11.0",
"license": "MIT",
"status": "active"
},
{
"tool": "Agno",
"version": "2.5.10",
"role": "Core agent framework powering Timmy — tool registration, conversation management",
"install_command": "pip install agno==2.5.10",
"license": "MIT",
"status": "active"
}
]
},
{
"id": "nostr_lightning_bitcoin",
"name": "Nostr + Lightning + Bitcoin",
"description": "Sovereign identity, censorship-resistant communication, and value transfer",
"tools": [
{
"tool": "nostr-sdk",
"version": "0.44.2",
"role": "Python SDK for Nostr protocol — sovereign decentralized identity",
"install_command": "pip install nostr-sdk==0.44.2",
"license": "MIT",
"status": "active"
},
{
"tool": "nostrdvm",
"version": "latest",
"role": "Nostr Data Vending Machine — publish AI services on Nostr marketplace",
"install_command": "pip install nostrdvm",
"license": "MIT",
"status": "active"
},
{
"tool": "LND",
"version": "0.20.1",
"role": "Lightning Network Daemon — sovereign Bitcoin payment channel management",
"install_command": "brew install lnd",
"license": "MIT",
"status": "active"
},
{
"tool": "LN agent-tools",
"version": "latest",
"role": "Lightning Network integration tools for AI agents — invoice creation, payment",
"install_command": "pip install ln-agent-tools",
"license": "MIT",
"status": "active"
},
{
"tool": "LNbits",
"version": "1.4",
"role": "Lightning Network wallet and extensions platform — API-first payments",
"install_command": "docker pull lnbits/lnbits:1.4",
"license": "MIT",
"status": "active"
},
{
"tool": "Cashu",
"version": "0.17.0",
"role": "Ecash protocol for private Lightning-backed digital cash",
"install_command": "pip install cashu==0.17.0",
"license": "MIT",
"status": "active"
}
]
},
{
"id": "memory_knowledge_graphs",
"name": "Memory and Knowledge Graphs",
"description": "Persistent memory, vector search, knowledge graph construction",
"tools": [
{
"tool": "Graphiti",
"version": "0.28.2",
"role": "Episodic memory via temporal knowledge graphs — remember conversations",
"install_command": "pip install graphiti==0.28.2",
"license": "Apache-2.0",
"status": "active"
},
{
"tool": "Neo4j",
"version": "2026.02",
"role": "Graph database backend for knowledge graph storage and traversal",
"install_command": "docker pull neo4j:2026.02",
"license": "GPL-3.0 (Community)",
"status": "active"
},
{
"tool": "ChromaDB",
"version": "1.5.5",
"role": "Local vector database for semantic search over embeddings",
"install_command": "pip install chromadb==1.5.5",
"license": "Apache-2.0",
"status": "active"
},
{
"tool": "Mem0",
"version": "1.0.5",
"role": "Self-improving memory layer for AI agents — fact extraction and recall",
"install_command": "pip install mem0ai==1.0.5",
"license": "Apache-2.0",
"status": "active"
}
]
},
{
"id": "streaming_content",
"name": "Streaming and Content",
"description": "Video streaming, recording, editing, and content production",
"tools": [
{
"tool": "MediaMTX",
"version": "1.16.3",
"role": "RTSP/RTMP/HLS media server for streaming game footage and AI output",
"install_command": "docker pull bluenviron/mediamtx:1.16.3",
"license": "MIT",
"status": "active"
},
{
"tool": "OBS",
"version": "32.0.4",
"role": "Open Broadcaster Software — screen capture, scene composition, streaming",
"install_command": "brew install --cask obs",
"license": "GPL-2.0",
"status": "active"
},
{
"tool": "obsws-python",
"version": "latest",
"role": "Python client for OBS WebSocket — programmatic recording and scene control",
"install_command": "pip install obsws-python",
"license": "MIT",
"status": "active"
},
{
"tool": "MoviePy",
"version": "2.1.2",
"role": "Python video editing — clip assembly, overlay, sub-5-min episode production",
"install_command": "pip install moviepy==2.1.2",
"license": "MIT",
"status": "active"
}
]
}
]
}

35
memory/research/task.md Normal file
View File

@@ -0,0 +1,35 @@
# Research Report: Task #1341
**Date:** 2026-03-23
**Issue:** [#1341](http://143.198.27.163:3000/Rockachopa/Timmy-time-dashboard/issues/1341)
**Priority:** normal
**Delegated by:** Timmy via Kimi delegation pipeline
---
## Summary
This issue was submitted as a placeholder via the Kimi delegation pipeline with unfilled template fields:
- **Research Question:** `Q?` (template default — no actual question provided)
- **Background / Context:** `ctx` (template default — no context provided)
- **Task:** `Task` (template default — no task specified)
## Findings
No actionable research question was specified. The issue appears to be a test or
accidental submission of an unfilled delegation template.
## Recommendations
1. **Re-open with a real question** if there is a specific topic to research.
2. **Review the delegation pipeline** to add validation that prevents empty/template-default
submissions from reaching the backlog (e.g. reject issues where the body contains
literal placeholder strings like `Q?` or `ctx`).
3. **Add a pipeline guard** in the Kimi delegation script to require non-empty, non-default
values for `Research Question` and `Background / Context` before creating an issue.
## Next Steps
- [ ] Add input validation to Kimi delegation pipeline
- [ ] Re-file with a concrete research question if needed

View File

@@ -99,8 +99,8 @@ pythonpath = ["src", "tests"]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
timeout = 30
timeout_method = "signal"
timeout_func_only = false
timeout_method = "thread"
timeout_func_only = true
addopts = "-v --tb=short --strict-markers --disable-warnings --durations=10 --cov-fail-under=60"
markers = [
"unit: Unit tests (fast, no I/O)",

View File

@@ -1,21 +1,32 @@
"""Nexus — Timmy's persistent conversational awareness space.
"""Nexus v2 — Timmy's persistent conversational awareness space.
A conversational-only interface where Timmy maintains live memory context.
No tool use; pure conversation with memory integration and a teaching panel.
Extends the v1 Nexus (chat + memory sidebar + teaching panel) with:
- **Persistent conversations** — SQLite-backed history survives restarts.
- **Introspection panel** — live cognitive state, recent thoughts, session
analytics rendered alongside every conversation turn.
- **Sovereignty pulse** — real-time sovereignty health badge in the sidebar.
- **WebSocket** — pushes introspection + sovereignty snapshots so the
Nexus page stays alive without polling.
Routes:
GET /nexus — render nexus page with live memory sidebar
GET /nexus — render nexus page with full awareness panels
POST /nexus/chat — send a message; returns HTMX partial
POST /nexus/teach — inject a fact into Timmy's live memory
DELETE /nexus/history — clear the nexus conversation history
GET /nexus/introspect — JSON introspection snapshot (API)
WS /nexus/ws — live introspection + sovereignty push
Refs: #1090 (Nexus Epic), #953 (Sovereignty Loop)
"""
import asyncio
import json
import logging
from datetime import UTC, datetime
from fastapi import APIRouter, Form, Request
from fastapi.responses import HTMLResponse
from fastapi import APIRouter, Form, Request, WebSocket
from fastapi.responses import HTMLResponse, JSONResponse
from dashboard.templating import templates
from timmy.memory_system import (
@@ -24,6 +35,9 @@ from timmy.memory_system import (
search_memories,
store_personal_fact,
)
from timmy.nexus.introspection import nexus_introspector
from timmy.nexus.persistence import nexus_store
from timmy.nexus.sovereignty_pulse import sovereignty_pulse
from timmy.session import _clean_response, chat, reset_session
logger = logging.getLogger(__name__)
@@ -32,28 +46,74 @@ router = APIRouter(prefix="/nexus", tags=["nexus"])
_NEXUS_SESSION_ID = "nexus"
_MAX_MESSAGE_LENGTH = 10_000
_WS_PUSH_INTERVAL = 5 # seconds between WebSocket pushes
# In-memory conversation log for the Nexus session (mirrors chat store pattern
# but is scoped to the Nexus so it won't pollute the main dashboard history).
# In-memory conversation log — kept in sync with the persistent store
# so templates can render without hitting the DB on every page load.
_nexus_log: list[dict] = []
# ── Initialisation ───────────────────────────────────────────────────────────
# On module load, hydrate the in-memory log from the persistent store.
# This runs once at import time (process startup).
_HYDRATED = False
def _hydrate_log() -> None:
"""Load persisted history into the in-memory log (idempotent)."""
global _HYDRATED
if _HYDRATED:
return
try:
rows = nexus_store.get_history(limit=200)
_nexus_log.clear()
for row in rows:
_nexus_log.append(
{
"role": row["role"],
"content": row["content"],
"timestamp": row["timestamp"],
}
)
_HYDRATED = True
logger.info("Nexus: hydrated %d messages from persistent store", len(_nexus_log))
except Exception as exc:
logger.warning("Nexus: failed to hydrate from store: %s", exc)
_HYDRATED = True # Don't retry repeatedly
# ── Helpers ──────────────────────────────────────────────────────────────────
def _ts() -> str:
return datetime.now(UTC).strftime("%H:%M:%S")
def _append_log(role: str, content: str) -> None:
_nexus_log.append({"role": role, "content": content, "timestamp": _ts()})
# Keep last 200 exchanges to bound memory usage
"""Append to both in-memory log and persistent store."""
ts = _ts()
_nexus_log.append({"role": role, "content": content, "timestamp": ts})
# Bound in-memory log
if len(_nexus_log) > 200:
del _nexus_log[:-200]
# Persist
try:
nexus_store.append(role, content, timestamp=ts)
except Exception as exc:
logger.warning("Nexus: persist failed: %s", exc)
# ── Page route ───────────────────────────────────────────────────────────────
@router.get("", response_class=HTMLResponse)
async def nexus_page(request: Request):
"""Render the Nexus page with live memory context."""
"""Render the Nexus page with full awareness panels."""
_hydrate_log()
stats = get_memory_stats()
facts = recall_personal_facts_with_ids()[:8]
introspection = nexus_introspector.snapshot(conversation_log=_nexus_log)
pulse = sovereignty_pulse.snapshot()
return templates.TemplateResponse(
request,
@@ -63,13 +123,18 @@ async def nexus_page(request: Request):
"messages": list(_nexus_log),
"stats": stats,
"facts": facts,
"introspection": introspection.to_dict(),
"pulse": pulse.to_dict(),
},
)
# ── Chat route ───────────────────────────────────────────────────────────────
@router.post("/chat", response_class=HTMLResponse)
async def nexus_chat(request: Request, message: str = Form(...)):
"""Conversational-only chat routed through the Nexus session.
"""Conversational-only chat with persistence and introspection.
Does not invoke tool-use approval flow — pure conversation with memory
context injected from Timmy's live memory store.
@@ -87,18 +152,22 @@ async def nexus_chat(request: Request, message: str = Form(...)):
"error": "Message too long (max 10 000 chars).",
"timestamp": _ts(),
"memory_hits": [],
"introspection": nexus_introspector.snapshot().to_dict(),
},
)
ts = _ts()
# Fetch semantically relevant memories to surface in the sidebar
# Fetch semantically relevant memories
try:
memory_hits = await asyncio.to_thread(search_memories, query=message, limit=4)
except Exception as exc:
logger.warning("Nexus memory search failed: %s", exc)
memory_hits = []
# Track memory hits for analytics
nexus_introspector.record_memory_hits(len(memory_hits))
# Conversational response — no tool approval flow
response_text: str | None = None
error_text: str | None = None
@@ -113,6 +182,9 @@ async def nexus_chat(request: Request, message: str = Form(...)):
if response_text:
_append_log("assistant", response_text)
# Build fresh introspection snapshot after the exchange
introspection = nexus_introspector.snapshot(conversation_log=_nexus_log)
return templates.TemplateResponse(
request,
"partials/nexus_message.html",
@@ -122,10 +194,14 @@ async def nexus_chat(request: Request, message: str = Form(...)):
"error": error_text,
"timestamp": ts,
"memory_hits": memory_hits,
"introspection": introspection.to_dict(),
},
)
# ── Teach route ──────────────────────────────────────────────────────────────
@router.post("/teach", response_class=HTMLResponse)
async def nexus_teach(request: Request, fact: str = Form(...)):
"""Inject a fact into Timmy's live memory from the Nexus teaching panel."""
@@ -148,11 +224,20 @@ async def nexus_teach(request: Request, fact: str = Form(...)):
)
# ── Clear history ────────────────────────────────────────────────────────────
@router.delete("/history", response_class=HTMLResponse)
async def nexus_clear_history(request: Request):
"""Clear the Nexus conversation history."""
"""Clear the Nexus conversation history (both in-memory and persistent)."""
_nexus_log.clear()
try:
nexus_store.clear()
except Exception as exc:
logger.warning("Nexus: persistent clear failed: %s", exc)
nexus_introspector.reset()
reset_session(session_id=_NEXUS_SESSION_ID)
return templates.TemplateResponse(
request,
"partials/nexus_message.html",
@@ -162,5 +247,55 @@ async def nexus_clear_history(request: Request):
"error": None,
"timestamp": _ts(),
"memory_hits": [],
"introspection": nexus_introspector.snapshot().to_dict(),
},
)
# ── Introspection API ────────────────────────────────────────────────────────
@router.get("/introspect", response_class=JSONResponse)
async def nexus_introspect():
"""Return a JSON introspection snapshot (for API consumers)."""
snap = nexus_introspector.snapshot(conversation_log=_nexus_log)
pulse = sovereignty_pulse.snapshot()
return {
"introspection": snap.to_dict(),
"sovereignty_pulse": pulse.to_dict(),
}
# ── WebSocket — live Nexus push ──────────────────────────────────────────────
@router.websocket("/ws")
async def nexus_ws(websocket: WebSocket) -> None:
"""Push introspection + sovereignty pulse snapshots to the Nexus page.
The frontend connects on page load and receives JSON updates every
``_WS_PUSH_INTERVAL`` seconds, keeping the cognitive state panel,
thought stream, and sovereignty badge fresh without HTMX polling.
"""
await websocket.accept()
logger.info("Nexus WS connected")
try:
# Immediate first push
await _push_snapshot(websocket)
while True:
await asyncio.sleep(_WS_PUSH_INTERVAL)
await _push_snapshot(websocket)
except Exception:
logger.debug("Nexus WS disconnected")
async def _push_snapshot(ws: WebSocket) -> None:
"""Send the combined introspection + pulse payload."""
snap = nexus_introspector.snapshot(conversation_log=_nexus_log)
pulse = sovereignty_pulse.snapshot()
payload = {
"type": "nexus_state",
"introspection": snap.to_dict(),
"sovereignty_pulse": pulse.to_dict(),
}
await ws.send_text(json.dumps(payload))

View File

@@ -8,26 +8,40 @@
<div class="container-fluid nexus-layout py-3">
<div class="nexus-header mb-3">
<div class="nexus-title">// NEXUS</div>
<div class="nexus-subtitle">
Persistent conversational awareness &mdash; always present, always learning.
<div class="d-flex justify-content-between align-items-center">
<div>
<div class="nexus-title">// NEXUS</div>
<div class="nexus-subtitle">
Persistent conversational awareness &mdash; always present, always learning.
</div>
</div>
<!-- Sovereignty Pulse badge -->
<div class="nexus-pulse-badge" id="nexus-pulse-badge">
<span class="nexus-pulse-dot nexus-pulse-{{ pulse.health }}"></span>
<span class="nexus-pulse-label">SOVEREIGNTY</span>
<span class="nexus-pulse-value" id="pulse-overall">{{ pulse.overall_pct }}%</span>
</div>
</div>
</div>
<div class="nexus-grid">
<div class="nexus-grid-v2">
<!-- ── LEFT: Conversation ────────────────────────────────── -->
<div class="nexus-chat-col">
<div class="card mc-panel nexus-chat-panel">
<div class="card-header mc-panel-header d-flex justify-content-between align-items-center">
<span>// CONVERSATION</span>
<button class="mc-btn mc-btn-sm"
hx-delete="/nexus/history"
hx-target="#nexus-chat-log"
hx-swap="beforeend"
hx-confirm="Clear nexus conversation?">
CLEAR
</button>
<div class="d-flex align-items-center gap-2">
<span class="nexus-msg-count" id="nexus-msg-count"
title="Messages in this session">{{ messages|length }} msgs</span>
<button class="mc-btn mc-btn-sm"
hx-delete="/nexus/history"
hx-target="#nexus-chat-log"
hx-swap="beforeend"
hx-confirm="Clear nexus conversation?">
CLEAR
</button>
</div>
</div>
<div class="card-body p-2" id="nexus-chat-log">
@@ -67,14 +81,115 @@
</div>
</div>
<!-- ── RIGHT: Memory sidebar ─────────────────────────────── -->
<!-- ── RIGHT: Awareness sidebar ──────────────────────────── -->
<div class="nexus-sidebar-col">
<!-- Live memory context (updated with each response) -->
<!-- Cognitive State Panel -->
<div class="card mc-panel nexus-cognitive-panel mb-3">
<div class="card-header mc-panel-header">
<span>// COGNITIVE STATE</span>
<span class="nexus-engagement-badge" id="cog-engagement">
{{ introspection.cognitive.engagement | upper }}
</span>
</div>
<div class="card-body p-2">
<div class="nexus-cog-grid">
<div class="nexus-cog-item">
<div class="nexus-cog-label">MOOD</div>
<div class="nexus-cog-value" id="cog-mood">{{ introspection.cognitive.mood }}</div>
</div>
<div class="nexus-cog-item">
<div class="nexus-cog-label">FOCUS</div>
<div class="nexus-cog-value nexus-cog-focus" id="cog-focus">
{{ introspection.cognitive.focus_topic or '—' }}
</div>
</div>
<div class="nexus-cog-item">
<div class="nexus-cog-label">DEPTH</div>
<div class="nexus-cog-value" id="cog-depth">{{ introspection.cognitive.conversation_depth }}</div>
</div>
<div class="nexus-cog-item">
<div class="nexus-cog-label">INITIATIVE</div>
<div class="nexus-cog-value nexus-cog-focus" id="cog-initiative">
{{ introspection.cognitive.last_initiative or '—' }}
</div>
</div>
</div>
{% if introspection.cognitive.active_commitments %}
<div class="nexus-commitments mt-2">
<div class="nexus-cog-label">ACTIVE COMMITMENTS</div>
{% for c in introspection.cognitive.active_commitments %}
<div class="nexus-commitment-item">{{ c | e }}</div>
{% endfor %}
</div>
{% endif %}
</div>
</div>
<!-- Recent Thoughts Panel -->
<div class="card mc-panel nexus-thoughts-panel mb-3">
<div class="card-header mc-panel-header">
<span>// THOUGHT STREAM</span>
</div>
<div class="card-body p-2" id="nexus-thoughts-body">
{% if introspection.recent_thoughts %}
{% for t in introspection.recent_thoughts %}
<div class="nexus-thought-item">
<div class="nexus-thought-meta">
<span class="nexus-thought-seed">{{ t.seed_type }}</span>
<span class="nexus-thought-time">{{ t.created_at[:16] }}</span>
</div>
<div class="nexus-thought-content">{{ t.content | e }}</div>
</div>
{% endfor %}
{% else %}
<div class="nexus-empty-state">No thoughts yet. The thinking engine will populate this.</div>
{% endif %}
</div>
</div>
<!-- Sovereignty Pulse Detail -->
<div class="card mc-panel nexus-sovereignty-panel mb-3">
<div class="card-header mc-panel-header">
<span>// SOVEREIGNTY PULSE</span>
<span class="nexus-health-badge nexus-health-{{ pulse.health }}" id="pulse-health">
{{ pulse.health | upper }}
</span>
</div>
<div class="card-body p-2">
<div class="nexus-pulse-meters" id="nexus-pulse-meters">
{% for layer in pulse.layers %}
<div class="nexus-pulse-layer">
<div class="nexus-pulse-layer-label">{{ layer.name | upper }}</div>
<div class="nexus-pulse-bar-track">
<div class="nexus-pulse-bar-fill" style="width: {{ layer.sovereign_pct }}%"></div>
</div>
<div class="nexus-pulse-layer-pct">{{ layer.sovereign_pct }}%</div>
</div>
{% endfor %}
</div>
<div class="nexus-pulse-stats mt-2">
<div class="nexus-pulse-stat">
<span class="nexus-pulse-stat-label">Crystallizations</span>
<span class="nexus-pulse-stat-value" id="pulse-cryst">{{ pulse.crystallizations_last_hour }}</span>
</div>
<div class="nexus-pulse-stat">
<span class="nexus-pulse-stat-label">API Independence</span>
<span class="nexus-pulse-stat-value" id="pulse-api-indep">{{ pulse.api_independence_pct }}%</span>
</div>
<div class="nexus-pulse-stat">
<span class="nexus-pulse-stat-label">Total Events</span>
<span class="nexus-pulse-stat-value" id="pulse-events">{{ pulse.total_events }}</span>
</div>
</div>
</div>
</div>
<!-- Live Memory Context -->
<div class="card mc-panel nexus-memory-panel mb-3">
<div class="card-header mc-panel-header">
<span>// LIVE MEMORY</span>
<span class="badge ms-2" style="background:var(--purple-dim); color:var(--purple);">
<span class="badge ms-2" style="background:var(--purple-dim, rgba(168,85,247,0.15)); color:var(--purple);">
{{ stats.total_entries }} stored
</span>
</div>
@@ -85,7 +200,32 @@
</div>
</div>
<!-- Teaching panel -->
<!-- Session Analytics -->
<div class="card mc-panel nexus-analytics-panel mb-3">
<div class="card-header mc-panel-header">// SESSION ANALYTICS</div>
<div class="card-body p-2">
<div class="nexus-analytics-grid" id="nexus-analytics">
<div class="nexus-analytics-item">
<span class="nexus-analytics-label">Messages</span>
<span class="nexus-analytics-value" id="analytics-msgs">{{ introspection.analytics.total_messages }}</span>
</div>
<div class="nexus-analytics-item">
<span class="nexus-analytics-label">Avg Response</span>
<span class="nexus-analytics-value" id="analytics-avg">{{ introspection.analytics.avg_response_length }} chars</span>
</div>
<div class="nexus-analytics-item">
<span class="nexus-analytics-label">Memory Hits</span>
<span class="nexus-analytics-value" id="analytics-mem">{{ introspection.analytics.memory_hits_total }}</span>
</div>
<div class="nexus-analytics-item">
<span class="nexus-analytics-label">Duration</span>
<span class="nexus-analytics-value" id="analytics-dur">{{ introspection.analytics.session_duration_minutes }} min</span>
</div>
</div>
</div>
</div>
<!-- Teaching Panel -->
<div class="card mc-panel nexus-teach-panel">
<div class="card-header mc-panel-header">// TEACH TIMMY</div>
<div class="card-body p-2">
@@ -119,4 +259,128 @@
</div><!-- /nexus-grid -->
</div>
<!-- WebSocket for live Nexus updates -->
<script>
(function() {
var wsProto = location.protocol === 'https:' ? 'wss:' : 'ws:';
var wsUrl = wsProto + '//' + location.host + '/nexus/ws';
var ws = null;
var reconnectDelay = 2000;
function connect() {
ws = new WebSocket(wsUrl);
ws.onmessage = function(e) {
try {
var data = JSON.parse(e.data);
if (data.type === 'nexus_state') {
updateCognitive(data.introspection.cognitive);
updateThoughts(data.introspection.recent_thoughts);
updateAnalytics(data.introspection.analytics);
updatePulse(data.sovereignty_pulse);
}
} catch(err) { /* ignore parse errors */ }
};
ws.onclose = function() {
setTimeout(connect, reconnectDelay);
};
ws.onerror = function() { ws.close(); };
}
function updateCognitive(c) {
var el;
el = document.getElementById('cog-mood');
if (el) el.textContent = c.mood;
el = document.getElementById('cog-engagement');
if (el) el.textContent = c.engagement.toUpperCase();
el = document.getElementById('cog-focus');
if (el) el.textContent = c.focus_topic || '\u2014';
el = document.getElementById('cog-depth');
if (el) el.textContent = c.conversation_depth;
el = document.getElementById('cog-initiative');
if (el) el.textContent = c.last_initiative || '\u2014';
}
function updateThoughts(thoughts) {
var container = document.getElementById('nexus-thoughts-body');
if (!container || !thoughts || thoughts.length === 0) return;
var html = '';
for (var i = 0; i < thoughts.length; i++) {
var t = thoughts[i];
html += '<div class="nexus-thought-item">'
+ '<div class="nexus-thought-meta">'
+ '<span class="nexus-thought-seed">' + escHtml(t.seed_type) + '</span>'
+ '<span class="nexus-thought-time">' + escHtml((t.created_at || '').substring(0,16)) + '</span>'
+ '</div>'
+ '<div class="nexus-thought-content">' + escHtml(t.content) + '</div>'
+ '</div>';
}
container.innerHTML = html;
}
function updateAnalytics(a) {
var el;
el = document.getElementById('analytics-msgs');
if (el) el.textContent = a.total_messages;
el = document.getElementById('analytics-avg');
if (el) el.textContent = a.avg_response_length + ' chars';
el = document.getElementById('analytics-mem');
if (el) el.textContent = a.memory_hits_total;
el = document.getElementById('analytics-dur');
if (el) el.textContent = a.session_duration_minutes + ' min';
}
function updatePulse(p) {
var el;
el = document.getElementById('pulse-overall');
if (el) el.textContent = p.overall_pct + '%';
el = document.getElementById('pulse-health');
if (el) {
el.textContent = p.health.toUpperCase();
el.className = 'nexus-health-badge nexus-health-' + p.health;
}
el = document.getElementById('pulse-cryst');
if (el) el.textContent = p.crystallizations_last_hour;
el = document.getElementById('pulse-api-indep');
if (el) el.textContent = p.api_independence_pct + '%';
el = document.getElementById('pulse-events');
if (el) el.textContent = p.total_events;
// Update pulse badge dot
var badge = document.getElementById('nexus-pulse-badge');
if (badge) {
var dot = badge.querySelector('.nexus-pulse-dot');
if (dot) {
dot.className = 'nexus-pulse-dot nexus-pulse-' + p.health;
}
}
// Update layer bars
var meters = document.getElementById('nexus-pulse-meters');
if (meters && p.layers) {
var html = '';
for (var i = 0; i < p.layers.length; i++) {
var l = p.layers[i];
html += '<div class="nexus-pulse-layer">'
+ '<div class="nexus-pulse-layer-label">' + escHtml(l.name.toUpperCase()) + '</div>'
+ '<div class="nexus-pulse-bar-track">'
+ '<div class="nexus-pulse-bar-fill" style="width:' + l.sovereign_pct + '%"></div>'
+ '</div>'
+ '<div class="nexus-pulse-layer-pct">' + l.sovereign_pct + '%</div>'
+ '</div>';
}
meters.innerHTML = html;
}
}
function escHtml(s) {
if (!s) return '';
var d = document.createElement('div');
d.textContent = s;
return d.innerHTML;
}
connect();
})();
</script>
{% endblock %}

View File

@@ -89,7 +89,12 @@ class HotMemory:
"""Read hot memory — computed view of top facts + last reflection from DB."""
try:
facts = recall_personal_facts()
lines = ["# Timmy Hot Memory\n"]
now = datetime.now(UTC).strftime("%Y-%m-%d %H:%M UTC")
lines = [
"# Timmy Hot Memory\n",
f"> Working RAM — always loaded, ~300 lines max, pruned monthly",
f"> Last updated: {now}\n",
]
if facts:
lines.append("## Known Facts\n")

View File

@@ -0,0 +1,15 @@
"""Nexus subsystem — Timmy's sovereign conversational awareness space.
Extends the Nexus v1 chat interface with:
- **Introspection engine** — real-time cognitive state, thought-stream
integration, and session analytics surfaced directly in the Nexus.
- **Persistent sessions** — SQLite-backed conversation history that
survives process restarts.
- **Sovereignty pulse** — a live dashboard-within-dashboard showing
Timmy's sovereignty health, crystallization rate, and API independence.
"""
from timmy.nexus.introspection import NexusIntrospector # noqa: F401
from timmy.nexus.persistence import NexusStore # noqa: F401
from timmy.nexus.sovereignty_pulse import SovereigntyPulse # noqa: F401

View File

@@ -0,0 +1,236 @@
"""Nexus Introspection Engine — cognitive self-awareness for Timmy.
Aggregates live signals from the CognitiveTracker, ThinkingEngine, and
MemorySystem into a unified introspection snapshot. The Nexus template
renders this as an always-visible cognitive state panel so the operator
can observe Timmy's inner life in real time.
Design principles:
- Read-only observer — never mutates cognitive state.
- Graceful degradation — if any upstream is unavailable, the snapshot
still returns with partial data instead of crashing.
- JSON-serializable — every method returns plain dicts ready for
WebSocket push or Jinja2 template rendering.
Refs: #1090 (Nexus Epic), architecture-v2.md §Intelligence Surface
"""
from __future__ import annotations
import logging
from dataclasses import asdict, dataclass, field
from datetime import UTC, datetime
logger = logging.getLogger(__name__)
# ── Data models ──────────────────────────────────────────────────────────────
@dataclass
class CognitiveSummary:
"""Distilled view of Timmy's current cognitive state."""
mood: str = "settled"
engagement: str = "idle"
focus_topic: str | None = None
conversation_depth: int = 0
active_commitments: list[str] = field(default_factory=list)
last_initiative: str | None = None
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class ThoughtSummary:
"""Compact representation of a single thought for the Nexus viewer."""
id: str
content: str
seed_type: str
created_at: str
parent_id: str | None = None
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class SessionAnalytics:
"""Conversation-level analytics for the active Nexus session."""
total_messages: int = 0
user_messages: int = 0
assistant_messages: int = 0
avg_response_length: float = 0.0
topics_discussed: list[str] = field(default_factory=list)
session_start: str | None = None
session_duration_minutes: float = 0.0
memory_hits_total: int = 0
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class IntrospectionSnapshot:
"""Everything the Nexus template needs to render the cognitive panel."""
cognitive: CognitiveSummary = field(default_factory=CognitiveSummary)
recent_thoughts: list[ThoughtSummary] = field(default_factory=list)
analytics: SessionAnalytics = field(default_factory=SessionAnalytics)
timestamp: str = field(
default_factory=lambda: datetime.now(UTC).isoformat()
)
def to_dict(self) -> dict:
return {
"cognitive": self.cognitive.to_dict(),
"recent_thoughts": [t.to_dict() for t in self.recent_thoughts],
"analytics": self.analytics.to_dict(),
"timestamp": self.timestamp,
}
# ── Introspector ─────────────────────────────────────────────────────────────
class NexusIntrospector:
"""Aggregates cognitive signals into a single introspection snapshot.
Lazily pulls from:
- ``timmy.cognitive_state.cognitive_tracker``
- ``timmy.thinking.thinking_engine``
- Nexus conversation log (passed in to avoid circular import)
"""
def __init__(self) -> None:
self._session_start: datetime | None = None
self._topics: list[str] = []
self._memory_hit_count: int = 0
# ── Public API ────────────────────────────────────────────────────────
def snapshot(
self,
conversation_log: list[dict] | None = None,
) -> IntrospectionSnapshot:
"""Build a complete introspection snapshot.
Parameters
----------
conversation_log:
The in-memory ``_nexus_log`` from the routes module (list of
dicts with ``role``, ``content``, ``timestamp`` keys).
"""
return IntrospectionSnapshot(
cognitive=self._read_cognitive(),
recent_thoughts=self._read_thoughts(),
analytics=self._compute_analytics(conversation_log or []),
)
def record_memory_hits(self, count: int) -> None:
"""Track cumulative memory hits for session analytics."""
self._memory_hit_count += count
def reset(self) -> None:
"""Reset session-scoped analytics (e.g. on history clear)."""
self._session_start = None
self._topics.clear()
self._memory_hit_count = 0
# ── Cognitive state reader ────────────────────────────────────────────
def _read_cognitive(self) -> CognitiveSummary:
"""Pull current state from the CognitiveTracker singleton."""
try:
from timmy.cognitive_state import cognitive_tracker
state = cognitive_tracker.get_state()
return CognitiveSummary(
mood=state.mood,
engagement=state.engagement,
focus_topic=state.focus_topic,
conversation_depth=state.conversation_depth,
active_commitments=list(state.active_commitments),
last_initiative=state.last_initiative,
)
except Exception as exc:
logger.debug("Introspection: cognitive state unavailable: %s", exc)
return CognitiveSummary()
# ── Thought stream reader ─────────────────────────────────────────────
def _read_thoughts(self, limit: int = 5) -> list[ThoughtSummary]:
"""Pull recent thoughts from the ThinkingEngine."""
try:
from timmy.thinking import thinking_engine
thoughts = thinking_engine.get_recent_thoughts(limit=limit)
return [
ThoughtSummary(
id=t.id,
content=(
t.content[:200] + "" if len(t.content) > 200 else t.content
),
seed_type=t.seed_type,
created_at=t.created_at,
parent_id=t.parent_id,
)
for t in thoughts
]
except Exception as exc:
logger.debug("Introspection: thought stream unavailable: %s", exc)
return []
# ── Session analytics ─────────────────────────────────────────────────
def _compute_analytics(
self, conversation_log: list[dict]
) -> SessionAnalytics:
"""Derive analytics from the Nexus conversation log."""
if not conversation_log:
return SessionAnalytics()
if self._session_start is None:
self._session_start = datetime.now(UTC)
user_msgs = [m for m in conversation_log if m.get("role") == "user"]
asst_msgs = [
m for m in conversation_log if m.get("role") == "assistant"
]
avg_len = 0.0
if asst_msgs:
total_chars = sum(len(m.get("content", "")) for m in asst_msgs)
avg_len = total_chars / len(asst_msgs)
# Extract topics from user messages (simple: first 40 chars)
topics = []
seen: set[str] = set()
for m in user_msgs:
topic = m.get("content", "")[:40].strip()
if topic and topic.lower() not in seen:
topics.append(topic)
seen.add(topic.lower())
# Keep last 8 topics
topics = topics[-8:]
elapsed = (datetime.now(UTC) - self._session_start).total_seconds() / 60
return SessionAnalytics(
total_messages=len(conversation_log),
user_messages=len(user_msgs),
assistant_messages=len(asst_msgs),
avg_response_length=round(avg_len, 1),
topics_discussed=topics,
session_start=self._session_start.strftime("%H:%M:%S"),
session_duration_minutes=round(elapsed, 1),
memory_hits_total=self._memory_hit_count,
)
# ── Module singleton ─────────────────────────────────────────────────────────
nexus_introspector = NexusIntrospector()

View File

@@ -0,0 +1,230 @@
"""Nexus Session Persistence — durable conversation history.
The v1 Nexus kept conversations in a Python ``list`` that vanished on
every process restart. This module provides a SQLite-backed store so
Nexus conversations survive reboots while remaining fully local.
Schema:
nexus_messages(id, role, content, timestamp, session_tag)
Design decisions:
- One table, one DB file (``data/nexus.db``). Cheap, portable, sovereign.
- ``session_tag`` enables future per-operator sessions (#1090 deferred scope).
- Bounded history: ``MAX_MESSAGES`` rows per session tag. Oldest are pruned
automatically on insert.
- Thread-safe via SQLite WAL mode + module-level singleton.
Refs: #1090 (Nexus Epic — session persistence), architecture-v2.md §Data Layer
"""
from __future__ import annotations
import logging
import sqlite3
from contextlib import closing
from datetime import UTC, datetime
from pathlib import Path
from typing import TypedDict
logger = logging.getLogger(__name__)
# ── Defaults ─────────────────────────────────────────────────────────────────
_DEFAULT_DB_DIR = Path("data")
DB_PATH: Path = _DEFAULT_DB_DIR / "nexus.db"
MAX_MESSAGES = 500 # per session tag
DEFAULT_SESSION_TAG = "nexus"
# ── Schema ───────────────────────────────────────────────────────────────────
_SCHEMA = """\
CREATE TABLE IF NOT EXISTS nexus_messages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
role TEXT NOT NULL,
content TEXT NOT NULL,
timestamp TEXT NOT NULL,
session_tag TEXT NOT NULL DEFAULT 'nexus'
);
CREATE INDEX IF NOT EXISTS idx_nexus_session ON nexus_messages(session_tag);
CREATE INDEX IF NOT EXISTS idx_nexus_ts ON nexus_messages(timestamp);
"""
# ── Typed dict for rows ──────────────────────────────────────────────────────
class NexusMessage(TypedDict):
id: int
role: str
content: str
timestamp: str
session_tag: str
# ── Store ────────────────────────────────────────────────────────────────────
class NexusStore:
"""SQLite-backed persistence for Nexus conversations.
Usage::
store = NexusStore() # uses module-level DB_PATH
store.append("user", "hi")
msgs = store.get_history() # → list[NexusMessage]
store.clear() # wipe session
"""
def __init__(self, db_path: Path | None = None) -> None:
self._db_path = db_path or DB_PATH
self._conn: sqlite3.Connection | None = None
# ── Connection management ─────────────────────────────────────────────
def _get_conn(self) -> sqlite3.Connection:
if self._conn is None:
self._db_path.parent.mkdir(parents=True, exist_ok=True)
self._conn = sqlite3.connect(
str(self._db_path),
check_same_thread=False,
)
self._conn.row_factory = sqlite3.Row
self._conn.execute("PRAGMA journal_mode=WAL")
self._conn.executescript(_SCHEMA)
return self._conn
def close(self) -> None:
"""Close the underlying connection (idempotent)."""
if self._conn is not None:
try:
self._conn.close()
except Exception:
pass
self._conn = None
# ── Write ─────────────────────────────────────────────────────────────
def append(
self,
role: str,
content: str,
*,
timestamp: str | None = None,
session_tag: str = DEFAULT_SESSION_TAG,
) -> int:
"""Insert a message and return its row id.
Automatically prunes oldest messages when the session exceeds
``MAX_MESSAGES``.
"""
ts = timestamp or datetime.now(UTC).strftime("%H:%M:%S")
conn = self._get_conn()
with closing(conn.cursor()) as cur:
cur.execute(
"INSERT INTO nexus_messages (role, content, timestamp, session_tag) "
"VALUES (?, ?, ?, ?)",
(role, content, ts, session_tag),
)
row_id: int = cur.lastrowid # type: ignore[assignment]
conn.commit()
# Prune
self._prune(session_tag)
return row_id
def _prune(self, session_tag: str) -> None:
"""Remove oldest rows that exceed MAX_MESSAGES for *session_tag*."""
conn = self._get_conn()
with closing(conn.cursor()) as cur:
cur.execute(
"SELECT COUNT(*) FROM nexus_messages WHERE session_tag = ?",
(session_tag,),
)
count = cur.fetchone()[0]
if count > MAX_MESSAGES:
excess = count - MAX_MESSAGES
cur.execute(
"DELETE FROM nexus_messages WHERE id IN ("
" SELECT id FROM nexus_messages "
" WHERE session_tag = ? ORDER BY id ASC LIMIT ?"
")",
(session_tag, excess),
)
conn.commit()
# ── Read ──────────────────────────────────────────────────────────────
def get_history(
self,
session_tag: str = DEFAULT_SESSION_TAG,
limit: int = 200,
) -> list[NexusMessage]:
"""Return the most recent *limit* messages for *session_tag*.
Results are ordered oldest-first (ascending id).
"""
conn = self._get_conn()
with closing(conn.cursor()) as cur:
cur.execute(
"SELECT id, role, content, timestamp, session_tag "
"FROM nexus_messages "
"WHERE session_tag = ? "
"ORDER BY id DESC LIMIT ?",
(session_tag, limit),
)
rows = cur.fetchall()
# Reverse to chronological order
messages: list[NexusMessage] = [
NexusMessage(
id=r["id"],
role=r["role"],
content=r["content"],
timestamp=r["timestamp"],
session_tag=r["session_tag"],
)
for r in reversed(rows)
]
return messages
def message_count(
self, session_tag: str = DEFAULT_SESSION_TAG
) -> int:
"""Return total message count for *session_tag*."""
conn = self._get_conn()
with closing(conn.cursor()) as cur:
cur.execute(
"SELECT COUNT(*) FROM nexus_messages WHERE session_tag = ?",
(session_tag,),
)
return cur.fetchone()[0]
# ── Delete ────────────────────────────────────────────────────────────
def clear(self, session_tag: str = DEFAULT_SESSION_TAG) -> int:
"""Delete all messages for *session_tag*. Returns count deleted."""
conn = self._get_conn()
with closing(conn.cursor()) as cur:
cur.execute(
"DELETE FROM nexus_messages WHERE session_tag = ?",
(session_tag,),
)
deleted: int = cur.rowcount
conn.commit()
return deleted
def clear_all(self) -> int:
"""Delete every message across all session tags."""
conn = self._get_conn()
with closing(conn.cursor()) as cur:
cur.execute("DELETE FROM nexus_messages")
deleted: int = cur.rowcount
conn.commit()
return deleted
# ── Module singleton ─────────────────────────────────────────────────────────
nexus_store = NexusStore()

View File

@@ -0,0 +1,153 @@
"""Sovereignty Pulse — real-time sovereignty health for the Nexus.
Reads from the ``SovereigntyMetricsStore`` (created in PR #1331) and
distils it into a compact "pulse" that the Nexus template can render
as a persistent health badge.
The pulse answers one question at a glance: *how sovereign is Timmy
right now?*
Signals:
- Overall sovereignty percentage (0100).
- Per-layer breakdown (perception, decision, narration).
- Crystallization velocity — new rules learned in the last hour.
- API independence — percentage of recent inferences served locally.
- Health rating (sovereign / degraded / dependent).
All methods return plain dicts — no imports leak into the template layer.
Refs: #953 (Sovereignty Loop), #954 (metrics), #1090 (Nexus epic)
"""
from __future__ import annotations
import logging
from dataclasses import asdict, dataclass, field
from datetime import UTC, datetime
logger = logging.getLogger(__name__)
# ── Data model ───────────────────────────────────────────────────────────────
@dataclass
class LayerPulse:
"""Sovereignty metrics for a single AI layer."""
name: str
sovereign_pct: float = 0.0
cache_hits: int = 0
model_calls: int = 0
def to_dict(self) -> dict:
return asdict(self)
@dataclass
class SovereigntyPulseSnapshot:
"""Complete sovereignty health reading for the Nexus display."""
overall_pct: float = 0.0
health: str = "unknown" # sovereign | degraded | dependent | unknown
layers: list[LayerPulse] = field(default_factory=list)
crystallizations_last_hour: int = 0
api_independence_pct: float = 0.0
total_events: int = 0
timestamp: str = field(
default_factory=lambda: datetime.now(UTC).isoformat()
)
def to_dict(self) -> dict:
return {
"overall_pct": self.overall_pct,
"health": self.health,
"layers": [layer.to_dict() for layer in self.layers],
"crystallizations_last_hour": self.crystallizations_last_hour,
"api_independence_pct": self.api_independence_pct,
"total_events": self.total_events,
"timestamp": self.timestamp,
}
# ── Pulse reader ─────────────────────────────────────────────────────────────
def _classify_health(pct: float) -> str:
"""Map overall sovereignty percentage to a human-readable health label."""
if pct >= 80.0:
return "sovereign"
if pct >= 50.0:
return "degraded"
if pct > 0.0:
return "dependent"
return "unknown"
class SovereigntyPulse:
"""Reads sovereignty metrics and emits pulse snapshots.
Lazily imports from ``timmy.sovereignty.metrics`` so the Nexus
module has no hard compile-time dependency on the Sovereignty Loop.
"""
def snapshot(self) -> SovereigntyPulseSnapshot:
"""Build a pulse snapshot from the live metrics store."""
try:
return self._read_metrics()
except Exception as exc:
logger.debug("SovereigntyPulse: metrics unavailable: %s", exc)
return SovereigntyPulseSnapshot()
def _read_metrics(self) -> SovereigntyPulseSnapshot:
"""Internal reader — allowed to raise if imports fail."""
from timmy.sovereignty.metrics import get_metrics_store
store = get_metrics_store()
snap = store.get_snapshot()
# Parse per-layer stats from the snapshot
layers = []
layer_pcts: list[float] = []
for layer_name in ("perception", "decision", "narration"):
layer_data = snap.get(layer_name, {})
hits = layer_data.get("cache_hits", 0)
calls = layer_data.get("model_calls", 0)
total = hits + calls
pct = (hits / total * 100) if total > 0 else 0.0
layers.append(
LayerPulse(
name=layer_name,
sovereign_pct=round(pct, 1),
cache_hits=hits,
model_calls=calls,
)
)
layer_pcts.append(pct)
overall = round(sum(layer_pcts) / len(layer_pcts), 1) if layer_pcts else 0.0
# Crystallization count
cryst = snap.get("crystallizations", 0)
# API independence: cache_hits / total across all layers
total_hits = sum(layer.cache_hits for layer in layers)
total_calls = sum(layer.model_calls for layer in layers)
total_all = total_hits + total_calls
api_indep = round((total_hits / total_all * 100), 1) if total_all > 0 else 0.0
total_events = snap.get("total_events", 0)
return SovereigntyPulseSnapshot(
overall_pct=overall,
health=_classify_health(overall),
layers=layers,
crystallizations_last_hour=cryst,
api_independence_pct=api_indep,
total_events=total_events,
)
# ── Module singleton ─────────────────────────────────────────────────────────
sovereignty_pulse = SovereigntyPulse()

160
src/timmy/stack_manifest.py Normal file
View File

@@ -0,0 +1,160 @@
"""Sovereign tech stack manifest — machine-readable catalog with runtime query tool.
Loads ``docs/stack_manifest.json`` and exposes ``query_stack()`` for Timmy to
introspect his own technology stack at runtime.
Issue: #986 (parent: #982 Session Crystallization)
"""
from __future__ import annotations
import json
import logging
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
# Resolve project root: this file lives at src/timmy/stack_manifest.py
# Project root is two levels up from src/timmy/
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
_MANIFEST_PATH = _PROJECT_ROOT / "docs" / "stack_manifest.json"
# Cached manifest (loaded on first access)
_manifest_cache: dict[str, Any] | None = None
def _load_manifest(path: Path | None = None) -> dict[str, Any]:
"""Load and cache the stack manifest from disk.
Args:
path: Override manifest path (useful for testing).
Returns:
The parsed manifest dict.
Raises:
FileNotFoundError: If the manifest file doesn't exist.
json.JSONDecodeError: If the manifest is invalid JSON.
"""
global _manifest_cache
target = path or _MANIFEST_PATH
if _manifest_cache is not None and path is None:
return _manifest_cache
with open(target, encoding="utf-8") as f:
data = json.load(f)
if path is None:
_manifest_cache = data
return data
def _reset_cache() -> None:
"""Reset the manifest cache (for testing)."""
global _manifest_cache
_manifest_cache = None
def _match_tool(tool: dict, category: str | None, tool_name: str | None) -> bool:
"""Check if a tool entry matches the given filters.
Matching is case-insensitive and supports partial matches.
"""
if tool_name:
name_lower = tool_name.lower()
tool_lower = tool["tool"].lower()
if name_lower not in tool_lower and tool_lower not in name_lower:
return False
return True
def query_stack(
category: str | None = None,
tool: str | None = None,
) -> str:
"""Query Timmy's sovereign tech stack manifest.
Use this tool to discover what tools, frameworks, and services are available
in the sovereign stack — with exact versions, install commands, and roles.
Args:
category: Filter by category name or ID (e.g., 'llm_inference',
'Music and Voice', 'nostr'). Case-insensitive, partial match.
tool: Filter by tool name (e.g., 'Ollama', 'FastMCP', 'Neo4j').
Case-insensitive, partial match.
Returns:
Formatted string listing matching tools with version, role, install
command, license, and status. Returns a summary if no filters given.
Examples:
query_stack() → Full stack summary
query_stack(category="llm") → All LLM inference tools
query_stack(tool="Ollama") → Ollama details
query_stack(category="nostr", tool="LND") → LND in the Nostr category
"""
try:
manifest = _load_manifest()
except FileNotFoundError:
return "Stack manifest not found. Run from the project root or check docs/stack_manifest.json."
except json.JSONDecodeError as exc:
return f"Stack manifest is invalid JSON: {exc}"
categories = manifest.get("categories", [])
results: list[str] = []
match_count = 0
for cat in categories:
cat_id = cat.get("id", "")
cat_name = cat.get("name", "")
# Category filter
if category:
cat_lower = category.lower()
if (
cat_lower not in cat_id.lower()
and cat_lower not in cat_name.lower()
):
continue
cat_tools = cat.get("tools", [])
matching_tools = []
for t in cat_tools:
if _match_tool(t, category, tool):
matching_tools.append(t)
match_count += 1
if matching_tools:
results.append(f"\n## {cat_name} ({cat_id})")
results.append(f"{cat.get('description', '')}\n")
for t in matching_tools:
status_badge = f" [{t['status'].upper()}]" if t.get("status") != "active" else ""
results.append(f" **{t['tool']}** v{t['version']}{status_badge}")
results.append(f" Role: {t['role']}")
results.append(f" Install: `{t['install_command']}`")
results.append(f" License: {t['license']}")
results.append("")
if not results:
if category and tool:
return f'No tools found matching category="{category}", tool="{tool}".'
if category:
return f'No category matching "{category}". Available: {", ".join(c["id"] for c in categories)}'
if tool:
return f'No tool matching "{tool}" in any category.'
return "Stack manifest is empty."
header = f"Sovereign Tech Stack — {match_count} tool(s) matched"
if category:
header += f' (category: "{category}")'
if tool:
header += f' (tool: "{tool}")'
version = manifest.get("version", "unknown")
footer = f"\n---\nManifest v{version} | Source: docs/stack_manifest.json"
return header + "\n" + "\n".join(results) + footer

View File

@@ -244,6 +244,17 @@ def _register_thinking_tools(toolkit: Toolkit) -> None:
raise
def _register_stack_manifest_tool(toolkit: Toolkit) -> None:
"""Register the sovereign tech stack query tool."""
try:
from timmy.stack_manifest import query_stack
toolkit.register(query_stack, name="query_stack")
except (ImportError, AttributeError) as exc:
logger.error("Failed to register query_stack tool: %s", exc)
raise
# ---------------------------------------------------------------------------
# Full toolkit factories
# ---------------------------------------------------------------------------
@@ -281,6 +292,7 @@ def create_full_toolkit(base_dir: str | Path | None = None):
_register_gematria_tool(toolkit)
_register_artifact_tools(toolkit)
_register_thinking_tools(toolkit)
_register_stack_manifest_tool(toolkit)
# Gitea issue management is now provided by the gitea-mcp server
# (wired in as MCPTools in agent.py, not registered here)
@@ -507,6 +519,11 @@ def _introspection_tool_catalog() -> dict:
"description": "Review recent conversations to spot patterns, low-confidence answers, and errors",
"available_in": ["orchestrator"],
},
"query_stack": {
"name": "Query Stack",
"description": "Query the sovereign tech stack manifest — discover tools, versions, and install commands",
"available_in": ["orchestrator"],
},
"update_gitea_avatar": {
"name": "Update Gitea Avatar",
"description": "Generate and upload a wizard-themed avatar to Timmy's Gitea profile",

View File

@@ -2665,25 +2665,27 @@
}
.vs-btn-save:hover { opacity: 0.85; }
/* ── Nexus ────────────────────────────────────────────────── */
.nexus-layout { max-width: 1400px; margin: 0 auto; }
/* ── Nexus v2 ─────────────────────────────────────────────── */
.nexus-layout { max-width: 1600px; margin: 0 auto; }
.nexus-header { border-bottom: 1px solid var(--border); padding-bottom: 0.5rem; }
.nexus-title { font-size: 1.4rem; font-weight: 700; color: var(--purple); letter-spacing: 0.1em; }
.nexus-subtitle { font-size: 0.8rem; color: var(--text-dim); margin-top: 0.2rem; }
.nexus-grid {
/* v2 grid: wider sidebar for awareness panels */
.nexus-grid-v2 {
display: grid;
grid-template-columns: 1fr 320px;
grid-template-columns: 1fr 360px;
gap: 1rem;
align-items: start;
}
@media (max-width: 900px) {
.nexus-grid { grid-template-columns: 1fr; }
@media (max-width: 1000px) {
.nexus-grid-v2 { grid-template-columns: 1fr; }
}
.nexus-chat-panel { height: calc(100vh - 180px); display: flex; flex-direction: column; }
.nexus-chat-panel .card-body { overflow-y: auto; flex: 1; }
.nexus-msg-count { font-size: 0.7rem; color: var(--text-dim); letter-spacing: 0.05em; }
.nexus-empty-state {
color: var(--text-dim);
@@ -2693,6 +2695,177 @@
text-align: center;
}
/* Sidebar scrollable on short screens */
.nexus-sidebar-col { max-height: calc(100vh - 140px); overflow-y: auto; }
/* ── Sovereignty Pulse Badge (header) ── */
.nexus-pulse-badge {
display: flex;
align-items: center;
gap: 0.4rem;
background: var(--bg-card);
border: 1px solid var(--border);
border-radius: var(--radius-md);
padding: 0.3rem 0.7rem;
font-size: 0.72rem;
letter-spacing: 0.05em;
}
.nexus-pulse-dot {
width: 8px; height: 8px;
border-radius: 50%;
display: inline-block;
}
.nexus-pulse-dot.nexus-pulse-sovereign { background: var(--green); box-shadow: 0 0 6px var(--green); }
.nexus-pulse-dot.nexus-pulse-degraded { background: var(--amber); box-shadow: 0 0 6px var(--amber); }
.nexus-pulse-dot.nexus-pulse-dependent { background: var(--red); box-shadow: 0 0 6px var(--red); }
.nexus-pulse-dot.nexus-pulse-unknown { background: var(--text-dim); }
.nexus-pulse-label { color: var(--text-dim); }
.nexus-pulse-value { color: var(--text-bright); font-weight: 600; }
/* ── Cognitive State Panel ── */
.nexus-cognitive-panel .card-body { font-size: 0.78rem; }
.nexus-engagement-badge {
font-size: 0.65rem;
letter-spacing: 0.08em;
padding: 0.15rem 0.5rem;
border-radius: 3px;
background: rgba(168,85,247,0.12);
color: var(--purple);
}
.nexus-cog-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 0.5rem;
}
.nexus-cog-item {
background: rgba(255,255,255,0.02);
border-radius: 4px;
padding: 0.35rem 0.5rem;
}
.nexus-cog-label {
font-size: 0.62rem;
color: var(--text-dim);
letter-spacing: 0.08em;
margin-bottom: 0.15rem;
}
.nexus-cog-value {
color: var(--text-bright);
font-size: 0.8rem;
}
.nexus-cog-focus {
font-size: 0.72rem;
color: var(--text);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
max-width: 140px;
}
.nexus-commitments { font-size: 0.72rem; }
.nexus-commitment-item {
color: var(--text);
padding: 0.2rem 0;
border-bottom: 1px solid rgba(59,26,92,0.4);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
/* ── Thought Stream Panel ── */
.nexus-thoughts-panel .card-body { max-height: 200px; overflow-y: auto; }
.nexus-thought-item {
border-left: 2px solid var(--purple);
padding: 0.3rem 0.5rem;
margin-bottom: 0.5rem;
font-size: 0.76rem;
background: rgba(168,85,247,0.04);
border-radius: 0 4px 4px 0;
}
.nexus-thought-meta {
display: flex;
justify-content: space-between;
margin-bottom: 0.2rem;
}
.nexus-thought-seed {
color: var(--purple);
font-size: 0.65rem;
letter-spacing: 0.06em;
text-transform: uppercase;
}
.nexus-thought-time { color: var(--text-dim); font-size: 0.62rem; }
.nexus-thought-content { color: var(--text); line-height: 1.4; }
/* ── Sovereignty Pulse Detail Panel ── */
.nexus-health-badge {
font-size: 0.62rem;
letter-spacing: 0.08em;
padding: 0.15rem 0.5rem;
border-radius: 3px;
}
.nexus-health-sovereign { background: rgba(0,232,122,0.12); color: var(--green); }
.nexus-health-degraded { background: rgba(255,184,0,0.12); color: var(--amber); }
.nexus-health-dependent { background: rgba(255,68,85,0.12); color: var(--red); }
.nexus-health-unknown { background: rgba(107,74,138,0.12); color: var(--text-dim); }
.nexus-pulse-layer {
display: flex;
align-items: center;
gap: 0.4rem;
margin-bottom: 0.35rem;
font-size: 0.72rem;
}
.nexus-pulse-layer-label {
color: var(--text-dim);
min-width: 80px;
letter-spacing: 0.06em;
font-size: 0.65rem;
}
.nexus-pulse-bar-track {
flex: 1;
height: 6px;
background: rgba(59,26,92,0.5);
border-radius: 3px;
overflow: hidden;
}
.nexus-pulse-bar-fill {
height: 100%;
background: linear-gradient(90deg, var(--purple), var(--green));
border-radius: 3px;
transition: width 0.6s ease;
}
.nexus-pulse-layer-pct {
color: var(--text-bright);
font-size: 0.68rem;
min-width: 36px;
text-align: right;
}
.nexus-pulse-stats { font-size: 0.72rem; }
.nexus-pulse-stat {
display: flex;
justify-content: space-between;
padding: 0.2rem 0;
border-bottom: 1px solid rgba(59,26,92,0.3);
}
.nexus-pulse-stat-label { color: var(--text-dim); }
.nexus-pulse-stat-value { color: var(--text-bright); }
/* ── Session Analytics Panel ── */
.nexus-analytics-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 0.4rem;
font-size: 0.72rem;
}
.nexus-analytics-item {
display: flex;
justify-content: space-between;
padding: 0.25rem 0.4rem;
background: rgba(255,255,255,0.02);
border-radius: 4px;
}
.nexus-analytics-label { color: var(--text-dim); }
.nexus-analytics-value { color: var(--text-bright); }
/* Memory sidebar */
.nexus-memory-hits { font-size: 0.78rem; }
.nexus-memory-label { color: var(--text-dim); font-size: 0.72rem; margin-bottom: 0.4rem; letter-spacing: 0.05em; }

View File

@@ -33,6 +33,7 @@ for _mod in [
"sentence_transformers",
"swarm",
"swarm.event_log",
"cv2", # OpenCV import can hang under pytest-xdist parallel workers
]:
sys.modules.setdefault(_mod, MagicMock())

View File

@@ -1,4 +1,4 @@
"""Tests for the Nexus conversational awareness routes."""
"""Tests for the Nexus v2 conversational awareness routes."""
from unittest.mock import patch
@@ -24,6 +24,41 @@ def test_nexus_page_contains_teach_form(client):
assert "/nexus/teach" in response.text
def test_nexus_page_contains_cognitive_panel(client):
"""Nexus v2 page must include the cognitive state panel."""
response = client.get("/nexus")
assert response.status_code == 200
assert "COGNITIVE STATE" in response.text
def test_nexus_page_contains_thought_stream(client):
"""Nexus v2 page must include the thought stream panel."""
response = client.get("/nexus")
assert response.status_code == 200
assert "THOUGHT STREAM" in response.text
def test_nexus_page_contains_sovereignty_pulse(client):
"""Nexus v2 page must include the sovereignty pulse panel."""
response = client.get("/nexus")
assert response.status_code == 200
assert "SOVEREIGNTY PULSE" in response.text
def test_nexus_page_contains_session_analytics(client):
"""Nexus v2 page must include the session analytics panel."""
response = client.get("/nexus")
assert response.status_code == 200
assert "SESSION ANALYTICS" in response.text
def test_nexus_page_contains_websocket_script(client):
"""Nexus v2 page must include the WebSocket connection script."""
response = client.get("/nexus")
assert response.status_code == 200
assert "/nexus/ws" in response.text
def test_nexus_chat_empty_message_returns_empty(client):
"""POST /nexus/chat with blank message returns empty response."""
response = client.post("/nexus/chat", data={"message": " "})
@@ -72,3 +107,17 @@ def test_nexus_clear_history(client):
response = client.request("DELETE", "/nexus/history")
assert response.status_code == 200
assert "cleared" in response.text.lower()
def test_nexus_introspect_api(client):
"""GET /nexus/introspect should return JSON introspection snapshot."""
response = client.get("/nexus/introspect")
assert response.status_code == 200
data = response.json()
assert "introspection" in data
assert "sovereignty_pulse" in data
assert "cognitive" in data["introspection"]
assert "recent_thoughts" in data["introspection"]
assert "analytics" in data["introspection"]
assert "overall_pct" in data["sovereignty_pulse"]
assert "health" in data["sovereignty_pulse"]

View File

@@ -0,0 +1,598 @@
"""Unit tests for models/budget.py — comprehensive coverage for budget management.
Tests budget allocation, tracking, limit enforcement, and edge cases including:
- Zero budget scenarios
- Over-budget handling
- Budget reset behavior
- In-memory fallback when DB is unavailable
"""
import threading
import time
from datetime import UTC, date, datetime, timedelta
from unittest.mock import patch
import pytest
from infrastructure.models.budget import (
BudgetTracker,
SpendRecord,
estimate_cost_usd,
get_budget_tracker,
)
pytestmark = pytest.mark.unit
# ── Test SpendRecord dataclass ────────────────────────────────────────────────
class TestSpendRecord:
"""Tests for the SpendRecord dataclass."""
def test_spend_record_creation(self):
"""Test creating a SpendRecord with all fields."""
ts = time.time()
record = SpendRecord(
ts=ts,
provider="anthropic",
model="claude-haiku-4-5",
tokens_in=100,
tokens_out=200,
cost_usd=0.001,
tier="cloud",
)
assert record.ts == ts
assert record.provider == "anthropic"
assert record.model == "claude-haiku-4-5"
assert record.tokens_in == 100
assert record.tokens_out == 200
assert record.cost_usd == 0.001
assert record.tier == "cloud"
def test_spend_record_with_zero_tokens(self):
"""Test SpendRecord with zero tokens."""
ts = time.time()
record = SpendRecord(ts=ts, provider="openai", model="gpt-4o", tokens_in=0, tokens_out=0, cost_usd=0.0, tier="cloud")
assert record.tokens_in == 0
assert record.tokens_out == 0
# ── Test estimate_cost_usd function ───────────────────────────────────────────
class TestEstimateCostUsd:
"""Tests for the estimate_cost_usd function."""
def test_haiku_cheaper_than_sonnet(self):
"""Haiku should be cheaper than Sonnet for same tokens."""
haiku_cost = estimate_cost_usd("claude-haiku-4-5", 1000, 1000)
sonnet_cost = estimate_cost_usd("claude-sonnet-4-5", 1000, 1000)
assert haiku_cost < sonnet_cost
def test_zero_tokens_is_zero_cost(self):
"""Zero tokens should result in zero cost."""
assert estimate_cost_usd("gpt-4o", 0, 0) == 0.0
def test_only_input_tokens(self):
"""Cost calculation with only input tokens."""
cost = estimate_cost_usd("gpt-4o", 1000, 0)
expected = (1000 * 0.0025) / 1000.0 # $0.0025 per 1K input tokens
assert cost == pytest.approx(expected)
def test_only_output_tokens(self):
"""Cost calculation with only output tokens."""
cost = estimate_cost_usd("gpt-4o", 0, 1000)
expected = (1000 * 0.01) / 1000.0 # $0.01 per 1K output tokens
assert cost == pytest.approx(expected)
def test_unknown_model_uses_default(self):
"""Unknown model should use conservative default cost."""
cost = estimate_cost_usd("some-unknown-model-xyz", 1000, 1000)
assert cost > 0 # Uses conservative default, not zero
# Default is 0.003 input, 0.015 output per 1K
expected = (1000 * 0.003 + 1000 * 0.015) / 1000.0
assert cost == pytest.approx(expected)
def test_versioned_model_name_matches(self):
"""Versioned model names should match base model rates."""
cost1 = estimate_cost_usd("claude-haiku-4-5-20251001", 1000, 0)
cost2 = estimate_cost_usd("claude-haiku-4-5", 1000, 0)
assert cost1 == cost2
def test_gpt4o_mini_cheaper_than_gpt4o(self):
"""GPT-4o mini should be cheaper than GPT-4o."""
mini = estimate_cost_usd("gpt-4o-mini", 1000, 1000)
full = estimate_cost_usd("gpt-4o", 1000, 1000)
assert mini < full
def test_opus_most_expensive_claude(self):
"""Opus should be the most expensive Claude model."""
opus = estimate_cost_usd("claude-opus-4-5", 1000, 1000)
sonnet = estimate_cost_usd("claude-sonnet-4-5", 1000, 1000)
haiku = estimate_cost_usd("claude-haiku-4-5", 1000, 1000)
assert opus > sonnet > haiku
def test_grok_variants(self):
"""Test Grok model cost estimation."""
cost = estimate_cost_usd("grok-3", 1000, 1000)
assert cost > 0
cost_fast = estimate_cost_usd("grok-3-fast", 1000, 1000)
assert cost_fast > 0
def test_case_insensitive_matching(self):
"""Model name matching should be case insensitive."""
cost_lower = estimate_cost_usd("claude-haiku-4-5", 1000, 0)
cost_upper = estimate_cost_usd("CLAUDE-HAIKU-4-5", 1000, 0)
cost_mixed = estimate_cost_usd("Claude-Haiku-4-5", 1000, 0)
assert cost_lower == cost_upper == cost_mixed
def test_returns_float(self):
"""Function should always return a float."""
assert isinstance(estimate_cost_usd("haiku", 100, 200), float)
assert isinstance(estimate_cost_usd("unknown-model", 100, 200), float)
assert isinstance(estimate_cost_usd("haiku", 0, 0), float)
# ── Test BudgetTracker initialization ─────────────────────────────────────────
class TestBudgetTrackerInit:
"""Tests for BudgetTracker initialization."""
def test_creates_with_memory_db(self):
"""Tracker should initialize with in-memory database."""
tracker = BudgetTracker(db_path=":memory:")
assert tracker._db_ok is True
def test_in_memory_fallback_empty_on_creation(self):
"""In-memory fallback should start empty."""
tracker = BudgetTracker(db_path=":memory:")
assert tracker._in_memory == []
def test_custom_db_path(self, tmp_path):
"""Tracker should use custom database path."""
db_file = tmp_path / "custom_budget.db"
tracker = BudgetTracker(db_path=str(db_file))
assert tracker._db_ok is True
assert tracker._db_path == str(db_file)
assert db_file.exists()
def test_db_path_directory_creation(self, tmp_path):
"""Tracker should create parent directories if needed."""
db_file = tmp_path / "nested" / "dirs" / "budget.db"
tracker = BudgetTracker(db_path=str(db_file))
assert tracker._db_ok is True
assert db_file.parent.exists()
def test_invalid_db_path_fallback(self):
"""Tracker should fallback to in-memory on invalid path."""
# Use a path that cannot be created (e.g., permission denied simulation)
tracker = BudgetTracker.__new__(BudgetTracker)
tracker._db_path = "/nonexistent/invalid/path/budget.db"
tracker._lock = threading.Lock()
tracker._in_memory = []
tracker._db_ok = False
# Should still work with in-memory fallback
cost = tracker.record_spend("test", "model", cost_usd=0.01)
assert cost == 0.01
# ── Test BudgetTracker record_spend ───────────────────────────────────────────
class TestBudgetTrackerRecordSpend:
"""Tests for recording spend events."""
def test_record_spend_returns_cost(self):
"""record_spend should return the calculated cost."""
tracker = BudgetTracker(db_path=":memory:")
cost = tracker.record_spend("anthropic", "claude-haiku-4-5", 100, 200)
assert cost > 0
def test_record_spend_explicit_cost(self):
"""record_spend should use explicit cost when provided."""
tracker = BudgetTracker(db_path=":memory:")
cost = tracker.record_spend("anthropic", "model", cost_usd=1.23)
assert cost == pytest.approx(1.23)
def test_record_spend_accumulates(self):
"""Multiple spend records should accumulate correctly."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("openai", "gpt-4o", cost_usd=0.01)
tracker.record_spend("openai", "gpt-4o", cost_usd=0.02)
assert tracker.get_daily_spend() == pytest.approx(0.03, abs=1e-9)
def test_record_spend_with_tier_label(self):
"""record_spend should accept custom tier labels."""
tracker = BudgetTracker(db_path=":memory:")
cost = tracker.record_spend("anthropic", "haiku", tier="cloud_api")
assert cost >= 0
def test_record_spend_with_provider(self):
"""record_spend should track provider correctly."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("openai", "gpt-4o", cost_usd=0.01)
tracker.record_spend("anthropic", "claude-haiku", cost_usd=0.02)
assert tracker.get_daily_spend() == pytest.approx(0.03, abs=1e-9)
def test_record_zero_cost(self):
"""Recording zero cost should work correctly."""
tracker = BudgetTracker(db_path=":memory:")
cost = tracker.record_spend("test", "model", cost_usd=0.0)
assert cost == 0.0
assert tracker.get_daily_spend() == 0.0
def test_record_negative_cost(self):
"""Recording negative cost (refund) should work."""
tracker = BudgetTracker(db_path=":memory:")
cost = tracker.record_spend("test", "model", cost_usd=-0.50)
assert cost == -0.50
assert tracker.get_daily_spend() == -0.50
# ── Test BudgetTracker daily/monthly spend queries ────────────────────────────
class TestBudgetTrackerSpendQueries:
"""Tests for daily and monthly spend queries."""
def test_monthly_spend_includes_daily(self):
"""Monthly spend should be >= daily spend."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("anthropic", "haiku", cost_usd=5.00)
assert tracker.get_monthly_spend() >= tracker.get_daily_spend()
def test_get_daily_spend_empty(self):
"""Daily spend should be zero when no records."""
tracker = BudgetTracker(db_path=":memory:")
assert tracker.get_daily_spend() == 0.0
def test_get_monthly_spend_empty(self):
"""Monthly spend should be zero when no records."""
tracker = BudgetTracker(db_path=":memory:")
assert tracker.get_monthly_spend() == 0.0
def test_daily_spend_isolation(self):
"""Daily spend should only include today's records, not old ones."""
tracker = BudgetTracker(db_path=":memory:")
# Force use of in-memory fallback
tracker._db_ok = False
# Add record for today
today_ts = datetime.combine(date.today(), datetime.min.time(), tzinfo=UTC).timestamp()
tracker._in_memory.append(
SpendRecord(today_ts + 3600, "test", "model", 0, 0, 1.0, "cloud")
)
# Add old record (2 days ago)
old_ts = (datetime.now(UTC) - timedelta(days=2)).timestamp()
tracker._in_memory.append(
SpendRecord(old_ts, "test", "old_model", 0, 0, 2.0, "cloud")
)
# Daily should only include today's 1.0
assert tracker.get_daily_spend() == pytest.approx(1.0, abs=1e-9)
# Monthly should include both (both are in current month)
assert tracker.get_monthly_spend() == pytest.approx(3.0, abs=1e-9)
# ── Test BudgetTracker cloud_allowed ──────────────────────────────────────────
class TestBudgetTrackerCloudAllowed:
"""Tests for cloud budget limit enforcement."""
def test_allowed_when_no_spend(self):
"""Cloud should be allowed when no spend recorded."""
tracker = BudgetTracker(db_path=":memory:")
assert tracker.cloud_allowed() is True
def test_blocked_when_daily_limit_exceeded(self):
"""Cloud should be blocked when daily limit exceeded."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("anthropic", "haiku", cost_usd=999.0)
# With default daily limit of 5.0, 999 should block
assert tracker.cloud_allowed() is False
def test_allowed_when_daily_limit_zero(self):
"""Cloud should be allowed when daily limit is 0 (disabled)."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("anthropic", "haiku", cost_usd=999.0)
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 0 # disabled
mock_settings.tier_cloud_monthly_budget_usd = 0 # disabled
assert tracker.cloud_allowed() is True
def test_blocked_when_monthly_limit_exceeded(self):
"""Cloud should be blocked when monthly limit exceeded."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("anthropic", "haiku", cost_usd=999.0)
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 0 # daily disabled
mock_settings.tier_cloud_monthly_budget_usd = 10.0
assert tracker.cloud_allowed() is False
def test_allowed_at_exact_daily_limit(self):
"""Cloud should be allowed when exactly at daily limit."""
tracker = BudgetTracker(db_path=":memory:")
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 5.0
mock_settings.tier_cloud_monthly_budget_usd = 0
# Record exactly at limit
tracker.record_spend("test", "model", cost_usd=5.0)
# At exactly the limit, it should return False (blocked)
# because spend >= limit
assert tracker.cloud_allowed() is False
def test_allowed_below_daily_limit(self):
"""Cloud should be allowed when below daily limit."""
tracker = BudgetTracker(db_path=":memory:")
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 5.0
mock_settings.tier_cloud_monthly_budget_usd = 0
tracker.record_spend("test", "model", cost_usd=4.99)
assert tracker.cloud_allowed() is True
def test_zero_budget_blocks_all(self):
"""Zero budget should block all cloud usage."""
tracker = BudgetTracker(db_path=":memory:")
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 0.01 # Very small budget
mock_settings.tier_cloud_monthly_budget_usd = 0
tracker.record_spend("test", "model", cost_usd=0.02)
# Over the tiny budget, should be blocked
assert tracker.cloud_allowed() is False
def test_both_limits_checked(self):
"""Both daily and monthly limits should be checked."""
tracker = BudgetTracker(db_path=":memory:")
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 100.0
mock_settings.tier_cloud_monthly_budget_usd = 10.0
tracker.record_spend("test", "model", cost_usd=15.0)
# Under daily but over monthly
assert tracker.cloud_allowed() is False
# ── Test BudgetTracker summary ────────────────────────────────────────────────
class TestBudgetTrackerSummary:
"""Tests for budget summary functionality."""
def test_summary_keys_present(self):
"""Summary should contain all expected keys."""
tracker = BudgetTracker(db_path=":memory:")
summary = tracker.get_summary()
assert "daily_usd" in summary
assert "monthly_usd" in summary
assert "daily_limit_usd" in summary
assert "monthly_limit_usd" in summary
assert "daily_ok" in summary
assert "monthly_ok" in summary
def test_summary_daily_ok_true_on_empty(self):
"""daily_ok and monthly_ok should be True when empty."""
tracker = BudgetTracker(db_path=":memory:")
summary = tracker.get_summary()
assert summary["daily_ok"] is True
assert summary["monthly_ok"] is True
def test_summary_daily_ok_false_when_exceeded(self):
"""daily_ok should be False when daily limit exceeded."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("openai", "gpt-4o", cost_usd=999.0)
summary = tracker.get_summary()
assert summary["daily_ok"] is False
def test_summary_monthly_ok_false_when_exceeded(self):
"""monthly_ok should be False when monthly limit exceeded."""
tracker = BudgetTracker(db_path=":memory:")
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 0
mock_settings.tier_cloud_monthly_budget_usd = 10.0
tracker.record_spend("openai", "gpt-4o", cost_usd=15.0)
summary = tracker.get_summary()
assert summary["monthly_ok"] is False
def test_summary_values_rounded(self):
"""Summary values should be rounded appropriately."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("test", "model", cost_usd=1.123456789)
summary = tracker.get_summary()
# daily_usd should be rounded to 6 decimal places
assert summary["daily_usd"] == 1.123457
def test_summary_with_disabled_limits(self):
"""Summary should handle disabled limits (0)."""
tracker = BudgetTracker(db_path=":memory:")
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 0
mock_settings.tier_cloud_monthly_budget_usd = 0
tracker.record_spend("test", "model", cost_usd=100.0)
summary = tracker.get_summary()
assert summary["daily_limit_usd"] == 0
assert summary["monthly_limit_usd"] == 0
assert summary["daily_ok"] is True
assert summary["monthly_ok"] is True
# ── Test BudgetTracker in-memory fallback ─────────────────────────────────────
class TestBudgetTrackerInMemoryFallback:
"""Tests for in-memory fallback when DB is unavailable."""
def test_in_memory_records_persisted(self):
"""Records should be stored in memory when DB is unavailable."""
tracker = BudgetTracker(db_path=":memory:")
# Force DB to appear unavailable
tracker._db_ok = False
tracker.record_spend("test", "model", cost_usd=0.01)
assert len(tracker._in_memory) == 1
assert tracker._in_memory[0].cost_usd == 0.01
def test_in_memory_query_spend(self):
"""Query spend should work with in-memory fallback."""
tracker = BudgetTracker(db_path=":memory:")
tracker._db_ok = False
tracker.record_spend("test", "model", cost_usd=0.01)
# Query should work from in-memory
since_ts = (datetime.now(UTC) - timedelta(hours=1)).timestamp()
result = tracker._query_spend(since_ts)
assert result == 0.01
def test_in_memory_older_records_not_counted(self):
"""In-memory records older than since_ts should not be counted."""
tracker = BudgetTracker(db_path=":memory:")
tracker._db_ok = False
old_ts = (datetime.now(UTC) - timedelta(days=2)).timestamp()
tracker._in_memory.append(
SpendRecord(old_ts, "test", "model", 0, 0, 1.0, "cloud")
)
# Query for records in last day
since_ts = (datetime.now(UTC) - timedelta(days=1)).timestamp()
result = tracker._query_spend(since_ts)
assert result == 0.0
# ── Test BudgetTracker thread safety ──────────────────────────────────────────
class TestBudgetTrackerThreadSafety:
"""Tests for thread-safe operations."""
def test_concurrent_record_spend(self):
"""Multiple threads should safely record spend concurrently."""
tracker = BudgetTracker(db_path=":memory:")
results = []
errors = []
def record_spends():
try:
for _ in range(10):
cost = tracker.record_spend("test", "model", cost_usd=0.01)
results.append(cost)
except Exception as e:
errors.append(e)
threads = [threading.Thread(target=record_spends) for _ in range(5)]
for t in threads:
t.start()
for t in threads:
t.join()
assert len(errors) == 0
assert len(results) == 50
assert tracker.get_daily_spend() == pytest.approx(0.50, abs=1e-9)
# ── Test BudgetTracker edge cases ─────────────────────────────────────────────
class TestBudgetTrackerEdgeCases:
"""Tests for edge cases and boundary conditions."""
def test_very_small_cost(self):
"""Tracker should handle very small costs."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("test", "model", cost_usd=0.000001)
assert tracker.get_daily_spend() == pytest.approx(0.000001, abs=1e-9)
def test_very_large_cost(self):
"""Tracker should handle very large costs."""
tracker = BudgetTracker(db_path=":memory:")
tracker.record_spend("test", "model", cost_usd=1_000_000.0)
assert tracker.get_daily_spend() == pytest.approx(1_000_000.0, abs=1e-9)
def test_many_records(self):
"""Tracker should handle many records efficiently."""
tracker = BudgetTracker(db_path=":memory:")
for i in range(100):
tracker.record_spend(f"provider_{i}", f"model_{i}", cost_usd=0.01)
assert tracker.get_daily_spend() == pytest.approx(1.0, abs=1e-9)
def test_empty_provider_name(self):
"""Tracker should handle empty provider name."""
tracker = BudgetTracker(db_path=":memory:")
cost = tracker.record_spend("", "model", cost_usd=0.01)
assert cost == 0.01
def test_empty_model_name(self):
"""Tracker should handle empty model name."""
tracker = BudgetTracker(db_path=":memory:")
cost = tracker.record_spend("provider", "", cost_usd=0.01)
assert cost == 0.01
# ── Test get_budget_tracker singleton ─────────────────────────────────────────
class TestGetBudgetTrackerSingleton:
"""Tests for the module-level BudgetTracker singleton."""
def test_returns_budget_tracker(self):
"""Singleton should return a BudgetTracker instance."""
import infrastructure.models.budget as bmod
bmod._budget_tracker = None
tracker = get_budget_tracker()
assert isinstance(tracker, BudgetTracker)
def test_returns_same_instance(self):
"""Singleton should return the same instance."""
import infrastructure.models.budget as bmod
bmod._budget_tracker = None
t1 = get_budget_tracker()
t2 = get_budget_tracker()
assert t1 is t2
def test_singleton_persists_state(self):
"""Singleton should persist state across calls."""
import infrastructure.models.budget as bmod
bmod._budget_tracker = None
tracker1 = get_budget_tracker()
# Record spend
tracker1.record_spend("test", "model", cost_usd=1.0)
# Get singleton again
tracker2 = get_budget_tracker()
assert tracker1 is tracker2
# ── Test BudgetTracker with mocked settings ───────────────────────────────────
class TestBudgetTrackerWithMockedSettings:
"""Tests using mocked settings for different scenarios."""
def test_high_daily_limit(self):
"""Test with high daily limit."""
tracker = BudgetTracker(db_path=":memory:")
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 1000.0
mock_settings.tier_cloud_monthly_budget_usd = 10000.0
tracker.record_spend("test", "model", cost_usd=500.0)
assert tracker.cloud_allowed() is True
def test_low_daily_limit(self):
"""Test with low daily limit."""
tracker = BudgetTracker(db_path=":memory:")
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 1.0
mock_settings.tier_cloud_monthly_budget_usd = 100.0
tracker.record_spend("test", "model", cost_usd=2.0)
assert tracker.cloud_allowed() is False
def test_only_monthly_limit_enabled(self):
"""Test with only monthly limit enabled."""
tracker = BudgetTracker(db_path=":memory:")
with patch("infrastructure.models.budget.settings") as mock_settings:
mock_settings.tier_cloud_daily_budget_usd = 0 # Disabled
mock_settings.tier_cloud_monthly_budget_usd = 50.0
tracker.record_spend("test", "model", cost_usd=30.0)
assert tracker.cloud_allowed() is True
tracker.record_spend("test", "model", cost_usd=25.0)
assert tracker.cloud_allowed() is False

311
tests/spark/test_engine.py Normal file
View File

@@ -0,0 +1,311 @@
"""Unit tests for spark/engine.py.
Covers the public API and internal helpers not exercised in other test files:
- get_memories / get_predictions query methods
- get_spark_engine singleton lifecycle and reset_spark_engine
- Module-level __getattr__ lazy access
- on_task_posted without candidate agents (no EIDOS call)
- on_task_completed with winning_bid parameter
- _maybe_consolidate early-return paths (<5 events, <3 outcomes)
- Disabled-engine guard for every mutating method
"""
import pytest
from unittest.mock import patch, MagicMock
@pytest.fixture(autouse=True)
def tmp_spark_db(tmp_path, monkeypatch):
"""Redirect all Spark SQLite writes to a temp directory."""
db_path = tmp_path / "spark.db"
monkeypatch.setattr("spark.memory.DB_PATH", db_path)
monkeypatch.setattr("spark.eidos.DB_PATH", db_path)
yield db_path
@pytest.fixture(autouse=True)
def reset_engine():
"""Ensure the engine singleton is cleared between tests."""
from spark.engine import reset_spark_engine
reset_spark_engine()
yield
reset_spark_engine()
# ── Query methods ─────────────────────────────────────────────────────────────
@pytest.mark.unit
class TestGetMemories:
def test_returns_empty_list_initially(self):
from spark.engine import SparkEngine
engine = SparkEngine(enabled=True)
assert engine.get_memories() == []
def test_returns_stored_memories(self):
from spark.engine import SparkEngine
from spark.memory import store_memory
store_memory("pattern", "agent-x", "Reliable performer", confidence=0.8)
engine = SparkEngine(enabled=True)
memories = engine.get_memories()
assert len(memories) == 1
assert memories[0].subject == "agent-x"
def test_limit_parameter(self):
from spark.engine import SparkEngine
from spark.memory import store_memory
for i in range(5):
store_memory("pattern", f"agent-{i}", f"Content {i}")
engine = SparkEngine(enabled=True)
assert len(engine.get_memories(limit=3)) == 3
def test_works_when_disabled(self):
"""get_memories is not gated by enabled — it always reads."""
from spark.engine import SparkEngine
from spark.memory import store_memory
store_memory("anomaly", "agent-z", "Bad actor")
engine = SparkEngine(enabled=False)
assert len(engine.get_memories()) == 1
@pytest.mark.unit
class TestGetPredictions:
def test_returns_empty_list_initially(self):
from spark.engine import SparkEngine
engine = SparkEngine(enabled=True)
assert engine.get_predictions() == []
def test_returns_predictions_after_task_posted(self):
from spark.engine import SparkEngine
engine = SparkEngine(enabled=True)
engine.on_task_posted("t1", "Deploy service", ["agent-a", "agent-b"])
preds = engine.get_predictions()
assert len(preds) >= 1
def test_limit_parameter(self):
from spark.engine import SparkEngine
engine = SparkEngine(enabled=True)
for i in range(5):
engine.on_task_posted(f"t{i}", f"Task {i}", ["agent-a"])
assert len(engine.get_predictions(limit=2)) == 2
# ── Singleton lifecycle ───────────────────────────────────────────────────────
@pytest.mark.unit
class TestGetSparkEngineSingleton:
def test_returns_spark_engine_instance(self):
from spark.engine import SparkEngine, get_spark_engine
engine = get_spark_engine()
assert isinstance(engine, SparkEngine)
def test_same_instance_on_repeated_calls(self):
from spark.engine import get_spark_engine
e1 = get_spark_engine()
e2 = get_spark_engine()
assert e1 is e2
def test_reset_clears_singleton(self):
from spark.engine import get_spark_engine, reset_spark_engine
e1 = get_spark_engine()
reset_spark_engine()
e2 = get_spark_engine()
assert e1 is not e2
def test_get_spark_engine_uses_settings(self, monkeypatch):
"""get_spark_engine respects spark_enabled from config."""
mock_settings = MagicMock()
mock_settings.spark_enabled = False
with patch("spark.engine.settings", mock_settings, create=True):
from spark.engine import get_spark_engine, reset_spark_engine
reset_spark_engine()
# Patch at import time by mocking the config module in engine
import spark.engine as engine_module
original_func = engine_module.get_spark_engine
def patched_get():
global _spark_engine
try:
engine_module._spark_engine = engine_module.SparkEngine(
enabled=mock_settings.spark_enabled
)
except Exception:
engine_module._spark_engine = engine_module.SparkEngine(enabled=True)
return engine_module._spark_engine
reset_spark_engine()
def test_get_spark_engine_falls_back_on_settings_error(self, monkeypatch):
"""get_spark_engine creates enabled engine when settings import fails."""
from spark.engine import get_spark_engine, reset_spark_engine
reset_spark_engine()
# Patch config to raise on import
with patch.dict("sys.modules", {"config": None}):
# The engine catches the exception and defaults to enabled=True
engine = get_spark_engine()
# May or may not succeed depending on import cache, just ensure no crash
assert engine is not None
@pytest.mark.unit
class TestModuleLevelGetattr:
def test_spark_engine_attribute_returns_engine(self):
import spark.engine as engine_module
engine = engine_module.spark_engine
assert isinstance(engine, engine_module.SparkEngine)
def test_unknown_attribute_raises(self):
import spark.engine as engine_module
with pytest.raises(AttributeError):
_ = engine_module.nonexistent_attribute_xyz
# ── Event capture edge cases ──────────────────────────────────────────────────
@pytest.mark.unit
class TestOnTaskPostedWithoutCandidates:
def test_no_eidos_prediction_when_no_candidates(self):
"""When candidate_agents is empty, no EIDOS prediction should be stored."""
from spark.engine import SparkEngine
from spark.eidos import get_predictions
engine = SparkEngine(enabled=True)
eid = engine.on_task_posted("t1", "Background task", candidate_agents=[])
assert eid is not None
# No candidates → no prediction
preds = get_predictions(task_id="t1")
assert len(preds) == 0
def test_no_candidates_defaults_to_none(self):
"""on_task_posted with no candidate_agents kwarg still records event."""
from spark.engine import SparkEngine
from spark.memory import get_events
engine = SparkEngine(enabled=True)
eid = engine.on_task_posted("t2", "Orphan task")
assert eid is not None
events = get_events(task_id="t2")
assert len(events) == 1
@pytest.mark.unit
class TestOnTaskCompletedWithBid:
def test_winning_bid_stored_in_data(self):
"""winning_bid is serialised into the event data field."""
import json
from spark.engine import SparkEngine
from spark.memory import get_events
engine = SparkEngine(enabled=True)
engine.on_task_completed("t1", "agent-a", "All done", winning_bid=42)
events = get_events(event_type="task_completed")
assert len(events) == 1
data = json.loads(events[0].data)
assert data["winning_bid"] == 42
def test_without_winning_bid_is_none(self):
import json
from spark.engine import SparkEngine
from spark.memory import get_events
engine = SparkEngine(enabled=True)
engine.on_task_completed("t2", "agent-b", "Done")
events = get_events(event_type="task_completed")
data = json.loads(events[0].data)
assert data["winning_bid"] is None
@pytest.mark.unit
class TestDisabledEngineGuards:
"""Every method that mutates state should return None when disabled."""
def setup_method(self):
from spark.engine import SparkEngine
self.engine = SparkEngine(enabled=False)
def test_on_task_posted_disabled(self):
assert self.engine.on_task_posted("t", "x") is None
def test_on_bid_submitted_disabled(self):
assert self.engine.on_bid_submitted("t", "a", 10) is None
def test_on_task_assigned_disabled(self):
assert self.engine.on_task_assigned("t", "a") is None
def test_on_task_completed_disabled(self):
assert self.engine.on_task_completed("t", "a", "r") is None
def test_on_task_failed_disabled(self):
assert self.engine.on_task_failed("t", "a", "reason") is None
def test_on_agent_joined_disabled(self):
assert self.engine.on_agent_joined("a", "Echo") is None
def test_on_tool_executed_disabled(self):
assert self.engine.on_tool_executed("a", "git_push") is None
def test_on_creative_step_disabled(self):
assert self.engine.on_creative_step("p", "storyboard", "pixel") is None
def test_get_advisories_disabled_returns_empty(self):
assert self.engine.get_advisories() == []
# ── _maybe_consolidate early-return paths ─────────────────────────────────────
@pytest.mark.unit
class TestMaybeConsolidateEarlyReturns:
"""Test the guard conditions at the top of _maybe_consolidate."""
@patch("spark.engine.spark_memory")
def test_fewer_than_5_events_skips(self, mock_memory):
"""With fewer than 5 events, consolidation is skipped immediately."""
from spark.engine import SparkEngine
mock_memory.get_events.return_value = [MagicMock(event_type="task_completed")] * 3
engine = SparkEngine(enabled=True)
engine._maybe_consolidate("agent-x")
mock_memory.store_memory.assert_not_called()
@patch("spark.engine.spark_memory")
def test_fewer_than_3_outcomes_skips(self, mock_memory):
"""With 5+ events but fewer than 3 completion/failure outcomes, skip."""
from spark.engine import SparkEngine
# 6 events but only 2 are outcomes (completions + failures)
events = [MagicMock(event_type="task_posted")] * 4
events += [MagicMock(event_type="task_completed")] * 2
mock_memory.get_events.return_value = events
engine = SparkEngine(enabled=True)
engine._maybe_consolidate("agent-x")
mock_memory.store_memory.assert_not_called()
mock_memory.get_memories.assert_not_called()
@patch("spark.engine.spark_memory")
def test_neutral_success_rate_skips(self, mock_memory):
"""Success rate between 0.3 and 0.8 triggers no memory."""
from spark.engine import SparkEngine
events = [MagicMock(event_type="task_posted")] * 2
events += [MagicMock(event_type="task_completed")] * 2
events += [MagicMock(event_type="task_failed")] * 2
mock_memory.get_events.return_value = events
engine = SparkEngine(enabled=True)
engine._maybe_consolidate("agent-x")
mock_memory.store_memory.assert_not_called()

View File

View File

@@ -0,0 +1,199 @@
"""Tests for the Nexus Introspection Engine."""
from unittest.mock import MagicMock, patch
from timmy.nexus.introspection import (
CognitiveSummary,
IntrospectionSnapshot,
NexusIntrospector,
SessionAnalytics,
ThoughtSummary,
)
# ── Data model tests ─────────────────────────────────────────────────────────
class TestCognitiveSummary:
def test_defaults(self):
s = CognitiveSummary()
assert s.mood == "settled"
assert s.engagement == "idle"
assert s.focus_topic is None
def test_to_dict(self):
s = CognitiveSummary(mood="curious", engagement="deep", focus_topic="architecture")
d = s.to_dict()
assert d["mood"] == "curious"
assert d["engagement"] == "deep"
assert d["focus_topic"] == "architecture"
class TestThoughtSummary:
def test_to_dict(self):
t = ThoughtSummary(
id="t1", content="Hello world", seed_type="freeform", created_at="2026-01-01"
)
d = t.to_dict()
assert d["id"] == "t1"
assert d["seed_type"] == "freeform"
assert d["parent_id"] is None
class TestSessionAnalytics:
def test_defaults(self):
a = SessionAnalytics()
assert a.total_messages == 0
assert a.avg_response_length == 0.0
assert a.topics_discussed == []
class TestIntrospectionSnapshot:
def test_to_dict_structure(self):
snap = IntrospectionSnapshot()
d = snap.to_dict()
assert "cognitive" in d
assert "recent_thoughts" in d
assert "analytics" in d
assert "timestamp" in d
def test_to_dict_with_data(self):
snap = IntrospectionSnapshot(
cognitive=CognitiveSummary(mood="energized"),
recent_thoughts=[
ThoughtSummary(id="x", content="test", seed_type="s", created_at="now"),
],
)
d = snap.to_dict()
assert d["cognitive"]["mood"] == "energized"
assert len(d["recent_thoughts"]) == 1
# ── Introspector tests ───────────────────────────────────────────────────────
class TestNexusIntrospector:
def test_snapshot_empty_log(self):
intro = NexusIntrospector()
snap = intro.snapshot(conversation_log=[])
assert isinstance(snap, IntrospectionSnapshot)
assert snap.analytics.total_messages == 0
def test_snapshot_with_messages(self):
intro = NexusIntrospector()
log = [
{"role": "user", "content": "hello", "timestamp": "10:00:00"},
{"role": "assistant", "content": "Hi there!", "timestamp": "10:00:01"},
{"role": "user", "content": "architecture question", "timestamp": "10:00:02"},
]
snap = intro.snapshot(conversation_log=log)
assert snap.analytics.total_messages == 3
assert snap.analytics.user_messages == 2
assert snap.analytics.assistant_messages == 1
assert snap.analytics.avg_response_length > 0
def test_record_memory_hits(self):
intro = NexusIntrospector()
intro.record_memory_hits(3)
intro.record_memory_hits(2)
snap = intro.snapshot(
conversation_log=[{"role": "user", "content": "x", "timestamp": "t"}]
)
assert snap.analytics.memory_hits_total == 5
def test_reset_clears_state(self):
intro = NexusIntrospector()
intro.record_memory_hits(10)
intro.reset()
snap = intro.snapshot(
conversation_log=[{"role": "user", "content": "x", "timestamp": "t"}]
)
assert snap.analytics.memory_hits_total == 0
def test_topics_deduplication(self):
intro = NexusIntrospector()
log = [
{"role": "user", "content": "hello", "timestamp": "t"},
{"role": "user", "content": "hello", "timestamp": "t"},
{"role": "user", "content": "different topic", "timestamp": "t"},
]
snap = intro.snapshot(conversation_log=log)
assert len(snap.analytics.topics_discussed) == 2
def test_topics_capped_at_8(self):
intro = NexusIntrospector()
log = [{"role": "user", "content": f"topic {i}", "timestamp": "t"} for i in range(15)]
snap = intro.snapshot(conversation_log=log)
assert len(snap.analytics.topics_discussed) <= 8
def test_cognitive_read_fallback(self):
"""If cognitive read fails, snapshot still works with defaults."""
intro = NexusIntrospector()
# Patch the module-level import inside _read_cognitive
with patch.dict("sys.modules", {"timmy.cognitive_state": None}):
snap = intro.snapshot(conversation_log=[])
# Should not raise — fallback to default
assert snap.cognitive.mood == "settled"
def test_thoughts_read_fallback(self):
"""If thought read fails, snapshot still works with empty list."""
intro = NexusIntrospector()
with patch.dict("sys.modules", {"timmy.thinking": None}):
snap = intro.snapshot(conversation_log=[])
assert snap.recent_thoughts == []
def test_read_cognitive_from_tracker(self):
intro = NexusIntrospector()
mock_state = MagicMock()
mock_state.mood = "curious"
mock_state.engagement = "deep"
mock_state.focus_topic = "sovereignty"
mock_state.conversation_depth = 5
mock_state.active_commitments = ["build something"]
mock_state.last_initiative = "build something"
mock_tracker = MagicMock()
mock_tracker.get_state.return_value = mock_state
with patch("timmy.cognitive_state.cognitive_tracker", mock_tracker):
summary = intro._read_cognitive()
assert summary.mood == "curious"
assert summary.engagement == "deep"
assert summary.focus_topic == "sovereignty"
assert summary.conversation_depth == 5
def test_read_thoughts_from_engine(self):
intro = NexusIntrospector()
mock_thought = MagicMock()
mock_thought.id = "t1"
mock_thought.content = "Deep thought about sovereignty"
mock_thought.seed_type = "existential"
mock_thought.created_at = "2026-03-23T10:00:00"
mock_thought.parent_id = None
mock_engine = MagicMock()
mock_engine.get_recent_thoughts.return_value = [mock_thought]
with patch("timmy.thinking.thinking_engine", mock_engine):
thoughts = intro._read_thoughts(limit=5)
assert len(thoughts) == 1
assert thoughts[0].id == "t1"
assert thoughts[0].seed_type == "existential"
def test_read_thoughts_truncates_long_content(self):
intro = NexusIntrospector()
mock_thought = MagicMock()
mock_thought.id = "t2"
mock_thought.content = "x" * 300
mock_thought.seed_type = "freeform"
mock_thought.created_at = "2026-03-23"
mock_thought.parent_id = None
mock_engine = MagicMock()
mock_engine.get_recent_thoughts.return_value = [mock_thought]
with patch("timmy.thinking.thinking_engine", mock_engine):
thoughts = intro._read_thoughts(limit=5)
assert len(thoughts[0].content) <= 201 # 200 + "…"

View File

@@ -0,0 +1,144 @@
"""Tests for the Nexus Session Persistence store."""
import pytest
from timmy.nexus.persistence import MAX_MESSAGES, NexusStore
@pytest.fixture
def store(tmp_path):
"""Provide a NexusStore backed by a temp database."""
db = tmp_path / "test_nexus.db"
s = NexusStore(db_path=db)
yield s
s.close()
class TestNexusStoreBasic:
def test_append_and_retrieve(self, store):
store.append("user", "hello")
store.append("assistant", "hi there")
history = store.get_history()
assert len(history) == 2
assert history[0]["role"] == "user"
assert history[0]["content"] == "hello"
assert history[1]["role"] == "assistant"
def test_message_count(self, store):
assert store.message_count() == 0
store.append("user", "a")
store.append("user", "b")
assert store.message_count() == 2
def test_custom_timestamp(self, store):
store.append("user", "msg", timestamp="12:34:56")
history = store.get_history()
assert history[0]["timestamp"] == "12:34:56"
def test_clear_session(self, store):
store.append("user", "a")
store.append("assistant", "b")
deleted = store.clear()
assert deleted == 2
assert store.message_count() == 0
def test_clear_empty_session(self, store):
deleted = store.clear()
assert deleted == 0
def test_clear_all(self, store):
store.append("user", "a", session_tag="s1")
store.append("user", "b", session_tag="s2")
deleted = store.clear_all()
assert deleted == 2
assert store.message_count(session_tag="s1") == 0
assert store.message_count(session_tag="s2") == 0
class TestNexusStoreOrdering:
def test_chronological_order(self, store):
for i in range(5):
store.append("user", f"msg-{i}")
history = store.get_history()
contents = [m["content"] for m in history]
assert contents == ["msg-0", "msg-1", "msg-2", "msg-3", "msg-4"]
def test_limit_parameter(self, store):
for i in range(10):
store.append("user", f"msg-{i}")
history = store.get_history(limit=3)
assert len(history) == 3
# Should be the 3 most recent
assert history[0]["content"] == "msg-7"
assert history[2]["content"] == "msg-9"
class TestNexusStoreSessionTags:
def test_session_isolation(self, store):
store.append("user", "nexus-msg", session_tag="nexus")
store.append("user", "other-msg", session_tag="other")
nexus_history = store.get_history(session_tag="nexus")
other_history = store.get_history(session_tag="other")
assert len(nexus_history) == 1
assert len(other_history) == 1
assert nexus_history[0]["content"] == "nexus-msg"
def test_clear_only_affects_target_session(self, store):
store.append("user", "a", session_tag="s1")
store.append("user", "b", session_tag="s2")
store.clear(session_tag="s1")
assert store.message_count(session_tag="s1") == 0
assert store.message_count(session_tag="s2") == 1
class TestNexusStorePruning:
def test_prune_excess_messages(self, tmp_path):
"""Inserting beyond MAX_MESSAGES should prune oldest."""
db = tmp_path / "prune_test.db"
s = NexusStore(db_path=db)
# Insert MAX_MESSAGES + 5 to trigger pruning
for i in range(MAX_MESSAGES + 5):
s.append("user", f"msg-{i}")
assert s.message_count() == MAX_MESSAGES
# Get full history — oldest remaining should be msg-5
history = s.get_history(limit=MAX_MESSAGES)
assert history[0]["content"] == "msg-5"
s.close()
class TestNexusStoreReopen:
def test_data_survives_close_reopen(self, tmp_path):
"""Data persists across store instances (simulates process restart)."""
db = tmp_path / "reopen.db"
s1 = NexusStore(db_path=db)
s1.append("user", "persistent message")
s1.close()
s2 = NexusStore(db_path=db)
history = s2.get_history()
assert len(history) == 1
assert history[0]["content"] == "persistent message"
s2.close()
class TestNexusStoreReturnedId:
def test_append_returns_row_id(self, store):
id1 = store.append("user", "first")
id2 = store.append("user", "second")
assert isinstance(id1, int)
assert id2 > id1
class TestNexusStoreClose:
def test_close_is_idempotent(self, store):
store.close()
store.close() # Should not raise
def test_operations_after_close_reconnect(self, store):
"""After close, next operation should reconnect automatically."""
store.append("user", "before close")
store.close()
# Should auto-reconnect
store.append("user", "after close")
assert store.message_count() == 2

View File

@@ -0,0 +1,151 @@
"""Tests for the Sovereignty Pulse module."""
from unittest.mock import MagicMock, patch
from timmy.nexus.sovereignty_pulse import (
LayerPulse,
SovereigntyPulse,
SovereigntyPulseSnapshot,
_classify_health,
)
class TestClassifyHealth:
def test_sovereign(self):
assert _classify_health(95.0) == "sovereign"
assert _classify_health(80.0) == "sovereign"
def test_degraded(self):
assert _classify_health(79.9) == "degraded"
assert _classify_health(50.0) == "degraded"
def test_dependent(self):
assert _classify_health(49.9) == "dependent"
assert _classify_health(0.1) == "dependent"
def test_unknown(self):
assert _classify_health(0.0) == "unknown"
class TestLayerPulse:
def test_to_dict(self):
lp = LayerPulse(name="perception", sovereign_pct=75.0, cache_hits=15, model_calls=5)
d = lp.to_dict()
assert d["name"] == "perception"
assert d["sovereign_pct"] == 75.0
assert d["cache_hits"] == 15
class TestSovereigntyPulseSnapshot:
def test_defaults(self):
snap = SovereigntyPulseSnapshot()
assert snap.overall_pct == 0.0
assert snap.health == "unknown"
assert snap.layers == []
def test_to_dict_structure(self):
snap = SovereigntyPulseSnapshot(
overall_pct=85.0,
health="sovereign",
layers=[LayerPulse(name="perception", sovereign_pct=90.0)],
crystallizations_last_hour=3,
api_independence_pct=88.0,
total_events=42,
)
d = snap.to_dict()
assert d["overall_pct"] == 85.0
assert d["health"] == "sovereign"
assert len(d["layers"]) == 1
assert d["layers"][0]["name"] == "perception"
assert d["crystallizations_last_hour"] == 3
assert d["api_independence_pct"] == 88.0
assert d["total_events"] == 42
assert "timestamp" in d
class TestSovereigntyPulse:
def test_snapshot_graceful_degradation(self):
"""When metrics are unavailable, should return default snapshot."""
pulse = SovereigntyPulse()
with patch.object(
pulse,
"_read_metrics",
side_effect=ImportError("no metrics"),
):
snap = pulse.snapshot()
assert isinstance(snap, SovereigntyPulseSnapshot)
assert snap.health == "unknown"
def test_snapshot_with_metrics(self):
"""When metrics are available, should read and compute correctly."""
pulse = SovereigntyPulse()
mock_snapshot = {
"perception": {"cache_hits": 8, "model_calls": 2},
"decision": {"cache_hits": 6, "model_calls": 4},
"narration": {"cache_hits": 10, "model_calls": 0},
"crystallizations": 7,
"total_events": 100,
}
mock_store = MagicMock()
mock_store.get_snapshot.return_value = mock_snapshot
with patch(
"timmy.sovereignty.metrics.get_metrics_store", return_value=mock_store
):
snap = pulse.snapshot()
# Perception: 8/10 = 80%, Decision: 6/10 = 60%, Narration: 10/10 = 100%
# Overall: (80 + 60 + 100) / 3 = 80.0
assert len(snap.layers) == 3
assert snap.layers[0].name == "perception"
assert snap.layers[0].sovereign_pct == 80.0
assert snap.layers[1].name == "decision"
assert snap.layers[1].sovereign_pct == 60.0
assert snap.layers[2].name == "narration"
assert snap.layers[2].sovereign_pct == 100.0
assert snap.overall_pct == 80.0
assert snap.health == "sovereign"
assert snap.crystallizations_last_hour == 7
assert snap.total_events == 100
def test_api_independence_calculation(self):
pulse = SovereigntyPulse()
mock_snapshot = {
"perception": {"cache_hits": 5, "model_calls": 5},
"decision": {"cache_hits": 5, "model_calls": 5},
"narration": {"cache_hits": 5, "model_calls": 5},
"crystallizations": 0,
"total_events": 0,
}
mock_store = MagicMock()
mock_store.get_snapshot.return_value = mock_snapshot
with patch(
"timmy.sovereignty.metrics.get_metrics_store", return_value=mock_store
):
snap = pulse.snapshot()
# Total hits: 15, Total calls: 15, Total: 30
# Independence: 15/30 = 50%
assert snap.api_independence_pct == 50.0
def test_zero_events_no_division_error(self):
pulse = SovereigntyPulse()
mock_snapshot = {
"perception": {"cache_hits": 0, "model_calls": 0},
"decision": {"cache_hits": 0, "model_calls": 0},
"narration": {"cache_hits": 0, "model_calls": 0},
"crystallizations": 0,
"total_events": 0,
}
mock_store = MagicMock()
mock_store.get_snapshot.return_value = mock_snapshot
with patch(
"timmy.sovereignty.metrics.get_metrics_store", return_value=mock_store
):
snap = pulse.snapshot()
assert snap.overall_pct == 0.0
assert snap.api_independence_pct == 0.0
assert snap.health == "unknown"

View File

@@ -0,0 +1,406 @@
"""Tests for timmy.stack_manifest — sovereign tech stack query tool.
Issue: #986
"""
import json
from pathlib import Path
from unittest.mock import patch
import pytest
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
_MINI_MANIFEST = {
"version": "1.0.0",
"categories": [
{
"id": "llm_inference",
"name": "Local LLM Inference",
"description": "On-device language model serving",
"tools": [
{
"tool": "Ollama",
"version": "0.18.2",
"role": "Primary local LLM runtime",
"install_command": "curl -fsSL https://ollama.com/install.sh | sh",
"license": "MIT",
"status": "active",
},
{
"tool": "mlx-lm",
"version": "0.31.1",
"role": "Apple MLX native inference",
"install_command": "pip install mlx-lm==0.31.1",
"license": "MIT",
"status": "active",
},
],
},
{
"id": "agent_orchestration",
"name": "Agent Orchestration",
"description": "Multi-agent coordination",
"tools": [
{
"tool": "FastMCP",
"version": "3.1.1",
"role": "MCP server framework",
"install_command": "pip install fastmcp==3.1.1",
"license": "MIT",
"status": "active",
},
{
"tool": "Agno",
"version": "2.5.10",
"role": "Core agent framework",
"install_command": "pip install agno==2.5.10",
"license": "MIT",
"status": "active",
},
],
},
{
"id": "nostr_lightning",
"name": "Nostr + Lightning + Bitcoin",
"description": "Sovereign identity and value transfer",
"tools": [
{
"tool": "LND",
"version": "0.20.1",
"role": "Lightning Network Daemon",
"install_command": "brew install lnd",
"license": "MIT",
"status": "active",
},
{
"tool": "exo-experimental",
"version": "1.0",
"role": "Test tool",
"install_command": "pip install exo",
"license": "GPL-3.0",
"status": "experimental",
},
],
},
],
}
def _write_manifest(tmp_path: Path, data: dict | None = None) -> Path:
"""Write a test manifest file and return its path."""
path = tmp_path / "stack_manifest.json"
path.write_text(json.dumps(data or _MINI_MANIFEST, indent=2))
return path
# ---------------------------------------------------------------------------
# _load_manifest
# ---------------------------------------------------------------------------
class TestLoadManifest:
"""Manifest loading and caching."""
def test_loads_from_file(self, tmp_path):
from timmy.stack_manifest import _load_manifest
path = _write_manifest(tmp_path)
data = _load_manifest(path)
assert data["version"] == "1.0.0"
assert len(data["categories"]) == 3
def test_raises_on_missing_file(self, tmp_path):
from timmy.stack_manifest import _load_manifest
with pytest.raises(FileNotFoundError):
_load_manifest(tmp_path / "nonexistent.json")
def test_raises_on_invalid_json(self, tmp_path):
from timmy.stack_manifest import _load_manifest
bad = tmp_path / "bad.json"
bad.write_text("{invalid json")
with pytest.raises(json.JSONDecodeError):
_load_manifest(bad)
def test_caching_works(self, tmp_path):
from timmy.stack_manifest import _load_manifest, _reset_cache
_reset_cache()
path = _write_manifest(tmp_path)
# Override the module-level path for caching test
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
data1 = _load_manifest()
data2 = _load_manifest()
assert data1 is data2 # Same object — cached
_reset_cache()
def test_reset_cache_clears(self, tmp_path):
from timmy.stack_manifest import _load_manifest, _reset_cache
_reset_cache()
path = _write_manifest(tmp_path)
_load_manifest(path)
_reset_cache()
from timmy import stack_manifest
assert stack_manifest._manifest_cache is None
# ---------------------------------------------------------------------------
# query_stack — no filters
# ---------------------------------------------------------------------------
class TestQueryStackNoFilters:
"""query_stack() with no arguments — full summary."""
def test_returns_all_tools(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack()
_reset_cache()
assert "6 tool(s) matched" in result # 2 + 2 + 2 (all tools counted)
assert "Ollama" in result
assert "FastMCP" in result
assert "LND" in result
def test_includes_manifest_version(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack()
_reset_cache()
assert "v1.0.0" in result
# ---------------------------------------------------------------------------
# query_stack — category filter
# ---------------------------------------------------------------------------
class TestQueryStackCategoryFilter:
"""query_stack(category=...) filtering."""
def test_filter_by_category_id(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(category="llm_inference")
_reset_cache()
assert "Ollama" in result
assert "mlx-lm" in result
assert "FastMCP" not in result
def test_filter_by_partial_category(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(category="nostr")
_reset_cache()
assert "LND" in result
assert "Ollama" not in result
def test_filter_by_category_name(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(category="Agent Orchestration")
_reset_cache()
assert "FastMCP" in result
assert "Agno" in result
def test_no_matching_category(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(category="quantum_computing")
_reset_cache()
assert "No category matching" in result
# ---------------------------------------------------------------------------
# query_stack — tool filter
# ---------------------------------------------------------------------------
class TestQueryStackToolFilter:
"""query_stack(tool=...) filtering."""
def test_filter_by_exact_tool(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(tool="Ollama")
_reset_cache()
assert "Ollama" in result
assert "0.18.2" in result
assert "FastMCP" not in result
def test_filter_by_partial_tool(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(tool="mcp")
_reset_cache()
assert "FastMCP" in result
def test_case_insensitive_tool(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(tool="ollama")
_reset_cache()
assert "Ollama" in result
def test_no_matching_tool(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(tool="nonexistent-tool")
_reset_cache()
assert "No tool matching" in result
# ---------------------------------------------------------------------------
# query_stack — combined filters
# ---------------------------------------------------------------------------
class TestQueryStackCombinedFilters:
"""query_stack(category=..., tool=...) combined filtering."""
def test_category_and_tool(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(category="nostr", tool="LND")
_reset_cache()
assert "LND" in result
assert "1 tool(s) matched" in result
def test_category_and_tool_no_match(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(category="llm_inference", tool="LND")
_reset_cache()
assert "No tools found" in result
# ---------------------------------------------------------------------------
# query_stack — error handling
# ---------------------------------------------------------------------------
class TestQueryStackErrors:
"""Error handling in query_stack."""
def test_missing_manifest(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
with patch("timmy.stack_manifest._MANIFEST_PATH", tmp_path / "missing.json"):
result = query_stack()
_reset_cache()
assert "not found" in result.lower()
def test_invalid_manifest(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
bad = tmp_path / "bad.json"
bad.write_text("{broken")
with patch("timmy.stack_manifest._MANIFEST_PATH", bad):
result = query_stack()
_reset_cache()
assert "invalid JSON" in result
def test_empty_manifest(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path, {"version": "1.0.0", "categories": []})
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack()
_reset_cache()
assert "empty" in result.lower()
# ---------------------------------------------------------------------------
# Output format
# ---------------------------------------------------------------------------
class TestOutputFormat:
"""Verify output formatting."""
def test_includes_install_command(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(tool="Ollama")
_reset_cache()
assert "Install:" in result
assert "curl -fsSL" in result
def test_includes_license(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(tool="Ollama")
_reset_cache()
assert "License: MIT" in result
def test_experimental_status_badge(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(tool="exo-experimental")
_reset_cache()
assert "[EXPERIMENTAL]" in result
def test_includes_role(self, tmp_path):
from timmy.stack_manifest import _reset_cache, query_stack
_reset_cache()
path = _write_manifest(tmp_path)
with patch("timmy.stack_manifest._MANIFEST_PATH", path):
result = query_stack(tool="Agno")
_reset_cache()
assert "Role:" in result
assert "Core agent framework" in result

View File

@@ -0,0 +1,617 @@
"""Unit tests for timmy/tools/system_tools.py.
Covers: _safe_eval, calculator, consult_grok, web_fetch,
create_aider_tool (AiderTool), create_code_tools,
create_security_tools, create_devops_tools.
"""
from __future__ import annotations
import ast
import math
import subprocess
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from timmy.tools.system_tools import (
_safe_eval,
calculator,
consult_grok,
create_aider_tool,
web_fetch,
)
pytestmark = pytest.mark.unit
# ── _safe_eval ────────────────────────────────────────────────────────────────
def _parse_eval(expr: str):
allowed = {k: getattr(math, k) for k in dir(math) if not k.startswith("_")}
allowed["math"] = math
allowed["abs"] = abs
allowed["round"] = round
allowed["min"] = min
allowed["max"] = max
tree = ast.parse(expr, mode="eval")
return _safe_eval(tree, allowed)
class TestSafeEval:
@pytest.mark.unit
def test_integer_constant(self):
assert _parse_eval("42") == 42
@pytest.mark.unit
def test_float_constant(self):
assert _parse_eval("3.14") == pytest.approx(3.14)
@pytest.mark.unit
def test_addition(self):
assert _parse_eval("1 + 2") == 3
@pytest.mark.unit
def test_subtraction(self):
assert _parse_eval("10 - 4") == 6
@pytest.mark.unit
def test_multiplication(self):
assert _parse_eval("3 * 7") == 21
@pytest.mark.unit
def test_division(self):
assert _parse_eval("10 / 4") == 2.5
@pytest.mark.unit
def test_floor_division(self):
assert _parse_eval("10 // 3") == 3
@pytest.mark.unit
def test_modulo(self):
assert _parse_eval("10 % 3") == 1
@pytest.mark.unit
def test_power(self):
assert _parse_eval("2 ** 8") == 256
@pytest.mark.unit
def test_unary_minus(self):
assert _parse_eval("-5") == -5
@pytest.mark.unit
def test_unary_plus(self):
assert _parse_eval("+5") == 5
@pytest.mark.unit
def test_math_attribute(self):
assert _parse_eval("math.pi") == pytest.approx(math.pi)
@pytest.mark.unit
def test_math_function_call(self):
assert _parse_eval("math.sqrt(16)") == pytest.approx(4.0)
@pytest.mark.unit
def test_allowed_name_abs(self):
assert _parse_eval("abs(-10)") == 10
@pytest.mark.unit
def test_allowed_name_round(self):
assert _parse_eval("round(3.7)") == 4
@pytest.mark.unit
def test_allowed_name_min(self):
assert _parse_eval("min(5, 2, 8)") == 2
@pytest.mark.unit
def test_allowed_name_max(self):
assert _parse_eval("max(5, 2, 8)") == 8
@pytest.mark.unit
def test_string_constant_rejected(self):
with pytest.raises(ValueError, match="Unsupported constant"):
_parse_eval("'hello'")
@pytest.mark.unit
def test_unknown_name_rejected(self):
with pytest.raises(ValueError, match="Unknown name"):
_parse_eval("xyz")
@pytest.mark.unit
def test_unsupported_binary_op(self):
with pytest.raises(ValueError, match="Unsupported"):
_parse_eval("3 & 5")
@pytest.mark.unit
def test_unsupported_unary_op(self):
with pytest.raises(ValueError, match="Unsupported"):
_parse_eval("~5")
@pytest.mark.unit
def test_attribute_on_non_math_rejected(self):
with pytest.raises(ValueError, match="Attribute access not allowed"):
_parse_eval("abs.__class__")
@pytest.mark.unit
def test_invalid_math_attr_rejected(self):
with pytest.raises(ValueError, match="Attribute access not allowed"):
_parse_eval("math.__builtins__")
@pytest.mark.unit
def test_unsupported_syntax_subscript(self):
with pytest.raises(ValueError, match="Unsupported syntax"):
_parse_eval("[1, 2][0]")
@pytest.mark.unit
def test_expression_wrapper(self):
"""ast.Expression node is unwrapped correctly."""
allowed = {"abs": abs}
tree = ast.parse("abs(-1)", mode="eval")
assert isinstance(tree, ast.Expression)
assert _safe_eval(tree, allowed) == 1
# ── calculator ────────────────────────────────────────────────────────────────
class TestCalculator:
@pytest.mark.unit
def test_basic_addition(self):
assert calculator("2 + 3") == "5"
@pytest.mark.unit
def test_multiplication(self):
assert calculator("6 * 7") == "42"
@pytest.mark.unit
def test_math_function(self):
assert calculator("math.sqrt(9)") == "3.0"
@pytest.mark.unit
def test_exponent(self):
assert calculator("2**10") == "1024"
@pytest.mark.unit
def test_error_on_syntax(self):
result = calculator("2 +")
assert "Error" in result
@pytest.mark.unit
def test_error_on_empty(self):
result = calculator("")
assert "Error" in result
@pytest.mark.unit
def test_error_on_division_by_zero(self):
result = calculator("1 / 0")
assert "Error" in result
@pytest.mark.unit
def test_error_message_contains_expression(self):
result = calculator("bad expr!!!")
assert "bad expr!!!" in result
@pytest.mark.unit
def test_injection_import(self):
result = calculator("__import__('os').system('echo hi')")
assert "Error" in result
@pytest.mark.unit
def test_injection_builtins(self):
result = calculator("__builtins__")
assert "Error" in result
@pytest.mark.unit
def test_string_literal_rejected(self):
result = calculator("'hello'")
assert "Error" in result
# ── consult_grok ──────────────────────────────────────────────────────────────
class TestConsultGrok:
@pytest.mark.unit
def test_grok_not_available(self):
with patch("timmy.backends.grok_available", return_value=False):
result = consult_grok("test query")
assert "not available" in result.lower()
@pytest.mark.unit
def test_grok_free_mode(self):
mock_backend = MagicMock()
mock_backend.run.return_value = MagicMock(content="Answer text")
mock_settings = MagicMock()
mock_settings.grok_free = True
with patch("timmy.backends.grok_available", return_value=True), \
patch("timmy.backends.get_grok_backend", return_value=mock_backend), \
patch("config.settings", mock_settings):
result = consult_grok("What is 2+2?")
assert result == "Answer text"
mock_backend.run.assert_called_once_with("What is 2+2?")
@pytest.mark.unit
def test_grok_spark_logging_failure_is_silent(self):
"""Spark logging failure should not crash consult_grok."""
mock_backend = MagicMock()
mock_backend.run.return_value = MagicMock(content="ok")
mock_settings = MagicMock()
mock_settings.grok_free = True
with patch("timmy.backends.grok_available", return_value=True), \
patch("timmy.backends.get_grok_backend", return_value=mock_backend), \
patch("config.settings", mock_settings), \
patch.dict("sys.modules", {"spark.engine": None}):
result = consult_grok("hello")
assert result == "ok"
@pytest.mark.unit
def test_grok_paid_mode_lightning_failure(self):
"""When Lightning invoice creation fails, return an error message."""
mock_backend = MagicMock()
mock_settings = MagicMock()
mock_settings.grok_free = False
mock_settings.grok_max_sats_per_query = 10
mock_settings.grok_sats_hard_cap = 100
mock_lightning = MagicMock()
mock_ln_backend = MagicMock()
mock_ln_backend.create_invoice.side_effect = OSError("LN down")
mock_lightning.get_backend.return_value = mock_ln_backend
with patch("timmy.backends.grok_available", return_value=True), \
patch("timmy.backends.get_grok_backend", return_value=mock_backend), \
patch("config.settings", mock_settings), \
patch.dict("sys.modules", {"lightning.factory": mock_lightning}):
result = consult_grok("expensive query")
assert "Error" in result
# ── web_fetch ─────────────────────────────────────────────────────────────────
class TestWebFetch:
@pytest.mark.unit
def test_invalid_scheme_ftp(self):
result = web_fetch("ftp://example.com")
assert "Error: invalid URL" in result
@pytest.mark.unit
def test_empty_url(self):
result = web_fetch("")
assert "Error: invalid URL" in result
@pytest.mark.unit
def test_no_scheme(self):
result = web_fetch("example.com/page")
assert "Error: invalid URL" in result
@pytest.mark.unit
def test_missing_requests_package(self):
with patch.dict("sys.modules", {"requests": None}):
result = web_fetch("https://example.com")
assert "requests" in result and "not installed" in result
@pytest.mark.unit
def test_missing_trafilatura_package(self):
mock_requests = MagicMock()
with patch.dict("sys.modules", {"requests": mock_requests, "trafilatura": None}):
result = web_fetch("https://example.com")
assert "trafilatura" in result and "not installed" in result
@pytest.mark.unit
def test_extraction_returns_none(self):
mock_requests = MagicMock()
mock_trafilatura = MagicMock()
mock_resp = MagicMock()
mock_resp.text = "<html></html>"
mock_requests.get.return_value = mock_resp
mock_requests.exceptions = _make_request_exceptions()
mock_trafilatura.extract.return_value = None
with patch.dict("sys.modules", {"requests": mock_requests, "trafilatura": mock_trafilatura}):
result = web_fetch("https://example.com")
assert "Error: could not extract" in result
@pytest.mark.unit
def test_truncation_applied(self):
mock_requests = MagicMock()
mock_trafilatura = MagicMock()
long_text = "x" * 10000
mock_resp = MagicMock()
mock_resp.text = "<html><body>" + long_text + "</body></html>"
mock_requests.get.return_value = mock_resp
mock_requests.exceptions = _make_request_exceptions()
mock_trafilatura.extract.return_value = long_text
with patch.dict("sys.modules", {"requests": mock_requests, "trafilatura": mock_trafilatura}):
result = web_fetch("https://example.com", max_tokens=100)
assert "[…truncated" in result
assert len(result) < 600
@pytest.mark.unit
def test_successful_fetch(self):
mock_requests = MagicMock()
mock_trafilatura = MagicMock()
mock_resp = MagicMock()
mock_resp.text = "<html><body><p>Hello</p></body></html>"
mock_requests.get.return_value = mock_resp
mock_requests.exceptions = _make_request_exceptions()
mock_trafilatura.extract.return_value = "Hello"
with patch.dict("sys.modules", {"requests": mock_requests, "trafilatura": mock_trafilatura}):
result = web_fetch("https://example.com")
assert result == "Hello"
@pytest.mark.unit
def test_timeout_error(self):
exc_mod = _make_request_exceptions()
mock_requests = MagicMock()
mock_requests.exceptions = exc_mod
mock_requests.get.side_effect = exc_mod.Timeout("timed out")
mock_trafilatura = MagicMock()
with patch.dict("sys.modules", {"requests": mock_requests, "trafilatura": mock_trafilatura}):
result = web_fetch("https://example.com")
assert "timed out" in result
@pytest.mark.unit
def test_http_error_404(self):
exc_mod = _make_request_exceptions()
mock_requests = MagicMock()
mock_requests.exceptions = exc_mod
mock_response = MagicMock()
mock_response.status_code = 404
mock_requests.get.return_value.raise_for_status.side_effect = exc_mod.HTTPError(
response=mock_response
)
mock_trafilatura = MagicMock()
with patch.dict("sys.modules", {"requests": mock_requests, "trafilatura": mock_trafilatura}):
result = web_fetch("https://example.com/nope")
assert "404" in result
@pytest.mark.unit
def test_request_exception(self):
exc_mod = _make_request_exceptions()
mock_requests = MagicMock()
mock_requests.exceptions = exc_mod
mock_requests.get.side_effect = exc_mod.RequestException("connection refused")
mock_trafilatura = MagicMock()
with patch.dict("sys.modules", {"requests": mock_requests, "trafilatura": mock_trafilatura}):
result = web_fetch("https://example.com")
assert "Error" in result
@pytest.mark.unit
def test_http_url_accepted(self):
"""http:// URLs should pass the scheme check."""
mock_requests = MagicMock()
mock_trafilatura = MagicMock()
mock_resp = MagicMock()
mock_resp.text = "<html><body><p>content</p></body></html>"
mock_requests.get.return_value = mock_resp
mock_requests.exceptions = _make_request_exceptions()
mock_trafilatura.extract.return_value = "content"
with patch.dict("sys.modules", {"requests": mock_requests, "trafilatura": mock_trafilatura}):
result = web_fetch("http://example.com")
assert result == "content"
# ── create_aider_tool / AiderTool ─────────────────────────────────────────────
class TestAiderTool:
@pytest.mark.unit
def test_factory_returns_tool(self, tmp_path):
tool = create_aider_tool(tmp_path)
assert hasattr(tool, "run_aider")
@pytest.mark.unit
def test_base_dir_set(self, tmp_path):
tool = create_aider_tool(tmp_path)
assert tool.base_dir == tmp_path
@pytest.mark.unit
@patch("subprocess.run")
def test_run_aider_success(self, mock_run, tmp_path):
mock_run.return_value = MagicMock(returncode=0, stdout="code generated")
tool = create_aider_tool(tmp_path)
result = tool.run_aider("add a function")
assert result == "code generated"
@pytest.mark.unit
@patch("subprocess.run")
def test_run_aider_success_empty_stdout(self, mock_run, tmp_path):
mock_run.return_value = MagicMock(returncode=0, stdout="")
tool = create_aider_tool(tmp_path)
result = tool.run_aider("do something")
assert "successfully" in result.lower()
@pytest.mark.unit
@patch("subprocess.run")
def test_run_aider_failure(self, mock_run, tmp_path):
mock_run.return_value = MagicMock(returncode=1, stderr="fatal error")
tool = create_aider_tool(tmp_path)
result = tool.run_aider("bad prompt")
assert "error" in result.lower()
assert "fatal error" in result
@pytest.mark.unit
@patch("subprocess.run")
def test_run_aider_not_installed(self, mock_run, tmp_path):
mock_run.side_effect = FileNotFoundError
tool = create_aider_tool(tmp_path)
result = tool.run_aider("task")
assert "not installed" in result.lower()
@pytest.mark.unit
@patch("subprocess.run")
def test_run_aider_timeout(self, mock_run, tmp_path):
mock_run.side_effect = subprocess.TimeoutExpired(cmd="aider", timeout=120)
tool = create_aider_tool(tmp_path)
result = tool.run_aider("long task")
assert "timed out" in result.lower()
@pytest.mark.unit
@patch("subprocess.run")
def test_run_aider_os_error(self, mock_run, tmp_path):
mock_run.side_effect = OSError("permission denied")
tool = create_aider_tool(tmp_path)
result = tool.run_aider("task")
assert "error" in result.lower()
@pytest.mark.unit
@patch("subprocess.run")
def test_custom_model_passed_to_subprocess(self, mock_run, tmp_path):
mock_run.return_value = MagicMock(returncode=0, stdout="ok")
tool = create_aider_tool(tmp_path)
tool.run_aider("task", model="mistral:7b")
call_args = mock_run.call_args[0][0]
assert "ollama/mistral:7b" in call_args
@pytest.mark.unit
@patch("subprocess.run")
def test_default_model_is_passed(self, mock_run, tmp_path):
mock_run.return_value = MagicMock(returncode=0, stdout="ok")
tool = create_aider_tool(tmp_path)
tool.run_aider("task")
call_args = mock_run.call_args[0][0]
assert "--model" in call_args
@pytest.mark.unit
@patch("subprocess.run")
def test_no_git_flag_present(self, mock_run, tmp_path):
mock_run.return_value = MagicMock(returncode=0, stdout="ok")
tool = create_aider_tool(tmp_path)
tool.run_aider("task")
call_args = mock_run.call_args[0][0]
assert "--no-git" in call_args
@pytest.mark.unit
@patch("subprocess.run")
def test_cwd_is_base_dir(self, mock_run, tmp_path):
mock_run.return_value = MagicMock(returncode=0, stdout="ok")
tool = create_aider_tool(tmp_path)
tool.run_aider("task")
assert mock_run.call_args[1]["cwd"] == str(tmp_path)
# ── create_code_tools / create_security_tools / create_devops_tools ───────────
class TestToolkitFactories:
@pytest.mark.unit
def test_create_code_tools_requires_agno(self):
from timmy.tools.system_tools import _AGNO_TOOLS_AVAILABLE
if _AGNO_TOOLS_AVAILABLE:
pytest.skip("Agno is available — ImportError path not testable")
from timmy.tools.system_tools import create_code_tools
with pytest.raises(ImportError):
create_code_tools()
@pytest.mark.unit
def test_create_security_tools_requires_agno(self):
from timmy.tools.system_tools import _AGNO_TOOLS_AVAILABLE
if _AGNO_TOOLS_AVAILABLE:
pytest.skip("Agno is available — ImportError path not testable")
from timmy.tools.system_tools import create_security_tools
with pytest.raises(ImportError):
create_security_tools()
@pytest.mark.unit
def test_create_devops_tools_requires_agno(self):
from timmy.tools.system_tools import _AGNO_TOOLS_AVAILABLE
if _AGNO_TOOLS_AVAILABLE:
pytest.skip("Agno is available — ImportError path not testable")
from timmy.tools.system_tools import create_devops_tools
with pytest.raises(ImportError):
create_devops_tools()
@pytest.mark.unit
def test_create_code_tools_with_agno(self, tmp_path):
from timmy.tools.system_tools import _AGNO_TOOLS_AVAILABLE
if not _AGNO_TOOLS_AVAILABLE:
pytest.skip("Agno not available")
from timmy.tools.system_tools import create_code_tools
mock_settings = MagicMock()
mock_settings.repo_root = str(tmp_path)
with patch("config.settings", mock_settings):
toolkit = create_code_tools(base_dir=tmp_path)
assert toolkit is not None
assert toolkit.name == "code"
@pytest.mark.unit
def test_create_security_tools_with_agno(self, tmp_path):
from timmy.tools.system_tools import _AGNO_TOOLS_AVAILABLE
if not _AGNO_TOOLS_AVAILABLE:
pytest.skip("Agno not available")
from timmy.tools.system_tools import create_security_tools
mock_settings = MagicMock()
mock_settings.repo_root = str(tmp_path)
with patch("config.settings", mock_settings):
toolkit = create_security_tools(base_dir=tmp_path)
assert toolkit is not None
assert toolkit.name == "security"
@pytest.mark.unit
def test_create_devops_tools_with_agno(self, tmp_path):
from timmy.tools.system_tools import _AGNO_TOOLS_AVAILABLE
if not _AGNO_TOOLS_AVAILABLE:
pytest.skip("Agno not available")
from timmy.tools.system_tools import create_devops_tools
mock_settings = MagicMock()
mock_settings.repo_root = str(tmp_path)
with patch("config.settings", mock_settings):
toolkit = create_devops_tools(base_dir=tmp_path)
assert toolkit is not None
assert toolkit.name == "devops"
# ── Helpers ───────────────────────────────────────────────────────────────────
def _make_request_exceptions():
"""Create a mock requests.exceptions module with real exception classes."""
class Timeout(Exception):
pass
class HTTPError(Exception):
def __init__(self, *args, response=None, **kwargs):
super().__init__(*args, **kwargs)
self.response = response
class RequestException(Exception):
pass
mod = MagicMock()
mod.Timeout = Timeout
mod.HTTPError = HTTPError
mod.RequestException = RequestException
return mod

File diff suppressed because it is too large Load Diff