feat: add run_self_tests() tool for self-verification (#65)

Timmy can now run his own test suite via the run_self_tests() tool. Supports 'fast' (unit only), 'full', or specific path scopes. Returns structured results with pass/fail counts. Sovereign self-verification — a fundamental capability.
2026-03-14 20:28:24 -04:00
parent c164d1736f
commit 2fb104528f
4 changed files with 181 additions and 1 deletions
--- a/src/timmy/tools_intro/init.py
+++ b/src/timmy/tools_intro/init.py
@@ -321,3 +321,78 @@ def get_live_system_status() -> dict[str, Any]:

    result["timestamp"] = datetime.now(UTC).isoformat()
    return result
+
+
+def run_self_tests(scope: str = "fast", _repo_root: str | None = None) -> dict[str, Any]:
+    """Run Timmy's own test suite and report results.
+
+    A sovereign agent verifies his own integrity. This runs pytest
+    on the codebase and returns a structured summary.
+
+    Args:
+        scope: Test scope — "fast" (unit tests only, ~30s timeout),
+               "full" (all tests), or a specific path like "tests/timmy/"
+        _repo_root: Optional repo root for testing (overrides settings)
+
+    Returns:
+        Dict with passed, failed, errors, total counts and summary text.
+    """
+    import subprocess
+
+    from config import settings
+
+    repo = _repo_root if _repo_root else settings.repo_root
+    venv_python = Path(repo) / ".venv" / "bin" / "python"
+    if not venv_python.exists():
+        return {"success": False, "error": f"No venv found at {venv_python}"}
+
+    cmd = [str(venv_python), "-m", "pytest", "-x", "-q", "--tb=short", "--timeout=30"]
+
+    if scope == "fast":
+        # Unit tests only — skip functional/e2e/integration
+        cmd.extend(
+            [
+                "--ignore=tests/functional",
+                "--ignore=tests/e2e",
+                "--ignore=tests/integrations",
+                "tests/",
+            ]
+        )
+    elif scope == "full":
+        cmd.append("tests/")
+    else:
+        # Specific path
+        cmd.append(scope)
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=120, cwd=repo)
+        output = result.stdout + result.stderr
+
+        # Parse pytest output for counts
+        passed = failed = errors = 0
+        for line in output.splitlines():
+            if "passed" in line or "failed" in line or "error" in line:
+                import re
+
+                nums = re.findall(r"(\d+) (passed|failed|error)", line)
+                for count, kind in nums:
+                    if kind == "passed":
+                        passed = int(count)
+                    elif kind == "failed":
+                        failed = int(count)
+                    elif kind == "error":
+                        errors = int(count)
+
+        return {
+            "success": result.returncode == 0,
+            "passed": passed,
+            "failed": failed,
+            "errors": errors,
+            "total": passed + failed + errors,
+            "return_code": result.returncode,
+            "summary": output[-2000:] if len(output) > 2000 else output,
+        }
+    except subprocess.TimeoutExpired:
+        return {"success": False, "error": "Test run timed out (120s limit)"}
+    except Exception as exc:
+        return {"success": False, "error": str(exc)}