From 2fb104528ff89d42264c77e082f2a3a801a2f823 Mon Sep 17 00:00:00 2001 From: Kimi Agent Date: Sat, 14 Mar 2026 20:28:24 -0400 Subject: [PATCH] feat: add run_self_tests() tool for self-verification (#65) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Timmy can now run his own test suite via the run_self_tests() tool. Supports 'fast' (unit only), 'full', or specific path scopes. Returns structured results with pass/fail counts. Sovereign self-verification — a fundamental capability. --- config/agents.yaml | 1 + src/timmy/tools.py | 8 ++- src/timmy/tools_intro/__init__.py | 75 +++++++++++++++++++++++ tests/timmy/test_introspection.py | 98 +++++++++++++++++++++++++++++++ 4 files changed, 181 insertions(+), 1 deletion(-) diff --git a/config/agents.yaml b/config/agents.yaml index 1faca4d8..3723447b 100644 --- a/config/agents.yaml +++ b/config/agents.yaml @@ -111,6 +111,7 @@ agents: - memory_search - memory_write - system_status + - self_test - shell prompt: | You are Timmy, a sovereign local AI orchestrator. diff --git a/src/timmy/tools.py b/src/timmy/tools.py index e9d53caa..5841f736 100644 --- a/src/timmy/tools.py +++ b/src/timmy/tools.py @@ -579,11 +579,17 @@ def create_full_toolkit(base_dir: str | Path | None = None): # System introspection - query runtime environment (sovereign self-knowledge) try: - from timmy.tools_intro import check_ollama_health, get_memory_status, get_system_info + from timmy.tools_intro import ( + check_ollama_health, + get_memory_status, + get_system_info, + run_self_tests, + ) toolkit.register(get_system_info, name="get_system_info") toolkit.register(check_ollama_health, name="check_ollama_health") toolkit.register(get_memory_status, name="get_memory_status") + toolkit.register(run_self_tests, name="run_self_tests") except Exception as exc: logger.warning("Tool execution failed (Introspection tools registration): %s", exc) logger.debug("Introspection tools not available") diff --git a/src/timmy/tools_intro/__init__.py b/src/timmy/tools_intro/__init__.py index 9f75becc..b0b6f01b 100644 --- a/src/timmy/tools_intro/__init__.py +++ b/src/timmy/tools_intro/__init__.py @@ -321,3 +321,78 @@ def get_live_system_status() -> dict[str, Any]: result["timestamp"] = datetime.now(UTC).isoformat() return result + + +def run_self_tests(scope: str = "fast", _repo_root: str | None = None) -> dict[str, Any]: + """Run Timmy's own test suite and report results. + + A sovereign agent verifies his own integrity. This runs pytest + on the codebase and returns a structured summary. + + Args: + scope: Test scope — "fast" (unit tests only, ~30s timeout), + "full" (all tests), or a specific path like "tests/timmy/" + _repo_root: Optional repo root for testing (overrides settings) + + Returns: + Dict with passed, failed, errors, total counts and summary text. + """ + import subprocess + + from config import settings + + repo = _repo_root if _repo_root else settings.repo_root + venv_python = Path(repo) / ".venv" / "bin" / "python" + if not venv_python.exists(): + return {"success": False, "error": f"No venv found at {venv_python}"} + + cmd = [str(venv_python), "-m", "pytest", "-x", "-q", "--tb=short", "--timeout=30"] + + if scope == "fast": + # Unit tests only — skip functional/e2e/integration + cmd.extend( + [ + "--ignore=tests/functional", + "--ignore=tests/e2e", + "--ignore=tests/integrations", + "tests/", + ] + ) + elif scope == "full": + cmd.append("tests/") + else: + # Specific path + cmd.append(scope) + + try: + result = subprocess.run(cmd, capture_output=True, text=True, timeout=120, cwd=repo) + output = result.stdout + result.stderr + + # Parse pytest output for counts + passed = failed = errors = 0 + for line in output.splitlines(): + if "passed" in line or "failed" in line or "error" in line: + import re + + nums = re.findall(r"(\d+) (passed|failed|error)", line) + for count, kind in nums: + if kind == "passed": + passed = int(count) + elif kind == "failed": + failed = int(count) + elif kind == "error": + errors = int(count) + + return { + "success": result.returncode == 0, + "passed": passed, + "failed": failed, + "errors": errors, + "total": passed + failed + errors, + "return_code": result.returncode, + "summary": output[-2000:] if len(output) > 2000 else output, + } + except subprocess.TimeoutExpired: + return {"success": False, "error": "Test run timed out (120s limit)"} + except Exception as exc: + return {"success": False, "error": str(exc)} diff --git a/tests/timmy/test_introspection.py b/tests/timmy/test_introspection.py index 6b2d733b..c1439472 100644 --- a/tests/timmy/test_introspection.py +++ b/tests/timmy/test_introspection.py @@ -158,3 +158,101 @@ class TestGetOllamaModelExactMatch: result = _get_ollama_model() assert result == "qwen3:30b" + + +class TestRunSelfTests: + """Tests for run_self_tests() — Timmy's self-verification tool.""" + + def test_returns_dict_with_expected_keys(self): + """run_self_tests should return structured test results.""" + from timmy.tools_intro import run_self_tests + + result = run_self_tests(scope="tests/timmy/test_introspection.py") + assert isinstance(result, dict) + assert "success" in result + # Should have count keys when tests ran + if result["success"] or "passed" in result: + assert "passed" in result + assert "failed" in result + assert "total" in result + + def test_fast_scope_skips_integration(self, monkeypatch, tmp_path): + """Fast scope should exclude functional/e2e/integration dirs.""" + import subprocess + + calls = [] + + def capture_run(*args, **kwargs): + calls.append(args[0] if args else kwargs.get("cmd")) + # Return a fake result + return subprocess.CompletedProcess( + args=args[0] if args else [], returncode=0, stdout="1 passed in 0.5s", stderr="" + ) + + monkeypatch.setattr(subprocess, "run", capture_run) + + # Create fake venv so check passes + venv_python = tmp_path / ".venv" / "bin" / "python" + venv_python.parent.mkdir(parents=True) + venv_python.write_text("#!/bin/sh\necho mock") + + from timmy.tools_intro import run_self_tests + + run_self_tests(scope="fast", _repo_root=str(tmp_path)) + assert len(calls) == 1 + cmd = calls[0] + assert "--ignore=tests/functional" in cmd + assert "--ignore=tests/e2e" in cmd + + def test_specific_path_scope(self, monkeypatch, tmp_path): + """Specific path scope passes path directly to pytest.""" + import subprocess + + calls = [] + + def capture_run(*args, **kwargs): + calls.append(args[0] if args else kwargs.get("cmd")) + return subprocess.CompletedProcess( + args=args[0] if args else [], returncode=0, stdout="5 passed in 1.0s", stderr="" + ) + + monkeypatch.setattr(subprocess, "run", capture_run) + + # Create fake venv so check passes + venv_python = tmp_path / ".venv" / "bin" / "python" + venv_python.parent.mkdir(parents=True) + venv_python.write_text("#!/bin/sh\necho mock") + + from timmy.tools_intro import run_self_tests + + run_self_tests(scope="tests/timmy/", _repo_root=str(tmp_path)) + assert len(calls) == 1 + assert "tests/timmy/" in calls[0] + + def test_missing_venv_returns_error(self, monkeypatch, tmp_path): + """Should handle missing venv gracefully.""" + from timmy.tools_intro import run_self_tests + + result = run_self_tests(_repo_root=str(tmp_path)) + assert result["success"] is False + assert "venv" in result.get("error", "").lower() + + def test_timeout_returns_error(self, monkeypatch, tmp_path): + """Should handle subprocess timeout gracefully.""" + import subprocess + + def timeout_run(*args, **kwargs): + raise subprocess.TimeoutExpired(cmd="pytest", timeout=120) + + monkeypatch.setattr(subprocess, "run", timeout_run) + + # Create fake venv so check passes + venv_python = tmp_path / ".venv" / "bin" / "python" + venv_python.parent.mkdir(parents=True) + venv_python.write_text("#!/bin/sh\necho mock") + + from timmy.tools_intro import run_self_tests + + result = run_self_tests(_repo_root=str(tmp_path)) + assert result["success"] is False + assert "timed out" in result.get("error", "").lower()