feat: add run_self_tests() tool for self-verification (#65)
Timmy can now run his own test suite via the run_self_tests() tool. Supports 'fast' (unit only), 'full', or specific path scopes. Returns structured results with pass/fail counts. Sovereign self-verification — a fundamental capability.
This commit is contained in:
@@ -111,6 +111,7 @@ agents:
|
|||||||
- memory_search
|
- memory_search
|
||||||
- memory_write
|
- memory_write
|
||||||
- system_status
|
- system_status
|
||||||
|
- self_test
|
||||||
- shell
|
- shell
|
||||||
prompt: |
|
prompt: |
|
||||||
You are Timmy, a sovereign local AI orchestrator.
|
You are Timmy, a sovereign local AI orchestrator.
|
||||||
|
|||||||
@@ -579,11 +579,17 @@ def create_full_toolkit(base_dir: str | Path | None = None):
|
|||||||
|
|
||||||
# System introspection - query runtime environment (sovereign self-knowledge)
|
# System introspection - query runtime environment (sovereign self-knowledge)
|
||||||
try:
|
try:
|
||||||
from timmy.tools_intro import check_ollama_health, get_memory_status, get_system_info
|
from timmy.tools_intro import (
|
||||||
|
check_ollama_health,
|
||||||
|
get_memory_status,
|
||||||
|
get_system_info,
|
||||||
|
run_self_tests,
|
||||||
|
)
|
||||||
|
|
||||||
toolkit.register(get_system_info, name="get_system_info")
|
toolkit.register(get_system_info, name="get_system_info")
|
||||||
toolkit.register(check_ollama_health, name="check_ollama_health")
|
toolkit.register(check_ollama_health, name="check_ollama_health")
|
||||||
toolkit.register(get_memory_status, name="get_memory_status")
|
toolkit.register(get_memory_status, name="get_memory_status")
|
||||||
|
toolkit.register(run_self_tests, name="run_self_tests")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("Tool execution failed (Introspection tools registration): %s", exc)
|
logger.warning("Tool execution failed (Introspection tools registration): %s", exc)
|
||||||
logger.debug("Introspection tools not available")
|
logger.debug("Introspection tools not available")
|
||||||
|
|||||||
@@ -321,3 +321,78 @@ def get_live_system_status() -> dict[str, Any]:
|
|||||||
|
|
||||||
result["timestamp"] = datetime.now(UTC).isoformat()
|
result["timestamp"] = datetime.now(UTC).isoformat()
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def run_self_tests(scope: str = "fast", _repo_root: str | None = None) -> dict[str, Any]:
|
||||||
|
"""Run Timmy's own test suite and report results.
|
||||||
|
|
||||||
|
A sovereign agent verifies his own integrity. This runs pytest
|
||||||
|
on the codebase and returns a structured summary.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
scope: Test scope — "fast" (unit tests only, ~30s timeout),
|
||||||
|
"full" (all tests), or a specific path like "tests/timmy/"
|
||||||
|
_repo_root: Optional repo root for testing (overrides settings)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with passed, failed, errors, total counts and summary text.
|
||||||
|
"""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
from config import settings
|
||||||
|
|
||||||
|
repo = _repo_root if _repo_root else settings.repo_root
|
||||||
|
venv_python = Path(repo) / ".venv" / "bin" / "python"
|
||||||
|
if not venv_python.exists():
|
||||||
|
return {"success": False, "error": f"No venv found at {venv_python}"}
|
||||||
|
|
||||||
|
cmd = [str(venv_python), "-m", "pytest", "-x", "-q", "--tb=short", "--timeout=30"]
|
||||||
|
|
||||||
|
if scope == "fast":
|
||||||
|
# Unit tests only — skip functional/e2e/integration
|
||||||
|
cmd.extend(
|
||||||
|
[
|
||||||
|
"--ignore=tests/functional",
|
||||||
|
"--ignore=tests/e2e",
|
||||||
|
"--ignore=tests/integrations",
|
||||||
|
"tests/",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
elif scope == "full":
|
||||||
|
cmd.append("tests/")
|
||||||
|
else:
|
||||||
|
# Specific path
|
||||||
|
cmd.append(scope)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120, cwd=repo)
|
||||||
|
output = result.stdout + result.stderr
|
||||||
|
|
||||||
|
# Parse pytest output for counts
|
||||||
|
passed = failed = errors = 0
|
||||||
|
for line in output.splitlines():
|
||||||
|
if "passed" in line or "failed" in line or "error" in line:
|
||||||
|
import re
|
||||||
|
|
||||||
|
nums = re.findall(r"(\d+) (passed|failed|error)", line)
|
||||||
|
for count, kind in nums:
|
||||||
|
if kind == "passed":
|
||||||
|
passed = int(count)
|
||||||
|
elif kind == "failed":
|
||||||
|
failed = int(count)
|
||||||
|
elif kind == "error":
|
||||||
|
errors = int(count)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": result.returncode == 0,
|
||||||
|
"passed": passed,
|
||||||
|
"failed": failed,
|
||||||
|
"errors": errors,
|
||||||
|
"total": passed + failed + errors,
|
||||||
|
"return_code": result.returncode,
|
||||||
|
"summary": output[-2000:] if len(output) > 2000 else output,
|
||||||
|
}
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
return {"success": False, "error": "Test run timed out (120s limit)"}
|
||||||
|
except Exception as exc:
|
||||||
|
return {"success": False, "error": str(exc)}
|
||||||
|
|||||||
@@ -158,3 +158,101 @@ class TestGetOllamaModelExactMatch:
|
|||||||
result = _get_ollama_model()
|
result = _get_ollama_model()
|
||||||
|
|
||||||
assert result == "qwen3:30b"
|
assert result == "qwen3:30b"
|
||||||
|
|
||||||
|
|
||||||
|
class TestRunSelfTests:
|
||||||
|
"""Tests for run_self_tests() — Timmy's self-verification tool."""
|
||||||
|
|
||||||
|
def test_returns_dict_with_expected_keys(self):
|
||||||
|
"""run_self_tests should return structured test results."""
|
||||||
|
from timmy.tools_intro import run_self_tests
|
||||||
|
|
||||||
|
result = run_self_tests(scope="tests/timmy/test_introspection.py")
|
||||||
|
assert isinstance(result, dict)
|
||||||
|
assert "success" in result
|
||||||
|
# Should have count keys when tests ran
|
||||||
|
if result["success"] or "passed" in result:
|
||||||
|
assert "passed" in result
|
||||||
|
assert "failed" in result
|
||||||
|
assert "total" in result
|
||||||
|
|
||||||
|
def test_fast_scope_skips_integration(self, monkeypatch, tmp_path):
|
||||||
|
"""Fast scope should exclude functional/e2e/integration dirs."""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def capture_run(*args, **kwargs):
|
||||||
|
calls.append(args[0] if args else kwargs.get("cmd"))
|
||||||
|
# Return a fake result
|
||||||
|
return subprocess.CompletedProcess(
|
||||||
|
args=args[0] if args else [], returncode=0, stdout="1 passed in 0.5s", stderr=""
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(subprocess, "run", capture_run)
|
||||||
|
|
||||||
|
# Create fake venv so check passes
|
||||||
|
venv_python = tmp_path / ".venv" / "bin" / "python"
|
||||||
|
venv_python.parent.mkdir(parents=True)
|
||||||
|
venv_python.write_text("#!/bin/sh\necho mock")
|
||||||
|
|
||||||
|
from timmy.tools_intro import run_self_tests
|
||||||
|
|
||||||
|
run_self_tests(scope="fast", _repo_root=str(tmp_path))
|
||||||
|
assert len(calls) == 1
|
||||||
|
cmd = calls[0]
|
||||||
|
assert "--ignore=tests/functional" in cmd
|
||||||
|
assert "--ignore=tests/e2e" in cmd
|
||||||
|
|
||||||
|
def test_specific_path_scope(self, monkeypatch, tmp_path):
|
||||||
|
"""Specific path scope passes path directly to pytest."""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def capture_run(*args, **kwargs):
|
||||||
|
calls.append(args[0] if args else kwargs.get("cmd"))
|
||||||
|
return subprocess.CompletedProcess(
|
||||||
|
args=args[0] if args else [], returncode=0, stdout="5 passed in 1.0s", stderr=""
|
||||||
|
)
|
||||||
|
|
||||||
|
monkeypatch.setattr(subprocess, "run", capture_run)
|
||||||
|
|
||||||
|
# Create fake venv so check passes
|
||||||
|
venv_python = tmp_path / ".venv" / "bin" / "python"
|
||||||
|
venv_python.parent.mkdir(parents=True)
|
||||||
|
venv_python.write_text("#!/bin/sh\necho mock")
|
||||||
|
|
||||||
|
from timmy.tools_intro import run_self_tests
|
||||||
|
|
||||||
|
run_self_tests(scope="tests/timmy/", _repo_root=str(tmp_path))
|
||||||
|
assert len(calls) == 1
|
||||||
|
assert "tests/timmy/" in calls[0]
|
||||||
|
|
||||||
|
def test_missing_venv_returns_error(self, monkeypatch, tmp_path):
|
||||||
|
"""Should handle missing venv gracefully."""
|
||||||
|
from timmy.tools_intro import run_self_tests
|
||||||
|
|
||||||
|
result = run_self_tests(_repo_root=str(tmp_path))
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "venv" in result.get("error", "").lower()
|
||||||
|
|
||||||
|
def test_timeout_returns_error(self, monkeypatch, tmp_path):
|
||||||
|
"""Should handle subprocess timeout gracefully."""
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
def timeout_run(*args, **kwargs):
|
||||||
|
raise subprocess.TimeoutExpired(cmd="pytest", timeout=120)
|
||||||
|
|
||||||
|
monkeypatch.setattr(subprocess, "run", timeout_run)
|
||||||
|
|
||||||
|
# Create fake venv so check passes
|
||||||
|
venv_python = tmp_path / ".venv" / "bin" / "python"
|
||||||
|
venv_python.parent.mkdir(parents=True)
|
||||||
|
venv_python.write_text("#!/bin/sh\necho mock")
|
||||||
|
|
||||||
|
from timmy.tools_intro import run_self_tests
|
||||||
|
|
||||||
|
result = run_self_tests(_repo_root=str(tmp_path))
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "timed out" in result.get("error", "").lower()
|
||||||
|
|||||||
Reference in New Issue
Block a user