diff --git a/Dockerfile b/Dockerfile index f5efe33..d710328 100644 --- a/Dockerfile +++ b/Dockerfile @@ -69,7 +69,7 @@ ENV PYTHONDONTWRITEBYTECODE=1 EXPOSE 8000 # ── Healthcheck ────────────────────────────────────────────────────────────── -HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \ +HEALTHCHECK --interval=30s --timeout=5s --start-period=30s --retries=3 \ CMD curl -f http://localhost:8000/health || exit 1 # ── Default: run the dashboard ─────────────────────────────────────────────── diff --git a/docker-compose.yml b/docker-compose.yml index 99046c5..5514319 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -52,7 +52,7 @@ services: interval: 30s timeout: 5s retries: 3 - start_period: 10s + start_period: 30s # ── Timmy — sovereign AI agent (separate container) ─────────────────────── timmy: diff --git a/pyproject.toml b/pyproject.toml index 30d9360..fbf5584 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,11 @@ python-telegram-bot = { version = ">=21.0", optional = true } "discord.py" = { version = ">=2.3.0", optional = true } airllm = { version = ">=2.9.0", optional = true } pyttsx3 = { version = ">=2.90", optional = true } +pytest = { version = ">=8.0.0", optional = true } +pytest-asyncio = { version = ">=0.24.0", optional = true } +pytest-cov = { version = ">=5.0.0", optional = true } +pytest-timeout = { version = ">=2.3.0", optional = true } +selenium = { version = ">=4.20.0", optional = true } [tool.poetry.extras] swarm = ["redis"] @@ -56,6 +61,7 @@ telegram = ["python-telegram-bot"] discord = ["discord.py"] bigbrain = ["airllm"] voice = ["pyttsx3"] +dev = ["pytest", "pytest-asyncio", "pytest-cov", "pytest-timeout", "selenium"] [tool.poetry.group.dev.dependencies] pytest = ">=8.0.0" diff --git a/src/dashboard/app.py b/src/dashboard/app.py index aa372a1..fd77c56 100644 --- a/src/dashboard/app.py +++ b/src/dashboard/app.py @@ -355,8 +355,12 @@ async def _start_chat_integrations_background() -> None: """Background task: start chat integrations without blocking startup.""" from integrations.telegram_bot.bot import telegram_bot from integrations.chat_bridge.vendors.discord import discord_bot - + from integrations.chat_bridge.registry import platform_registry + await asyncio.sleep(0.5) + + # Register Discord in the platform registry + platform_registry.register(discord_bot) if settings.telegram_token: try: @@ -392,8 +396,9 @@ async def lifespan(app: FastAPI): agent_id="timmy", ) - # Log swarm recovery summary + # Run swarm recovery and log summary from swarm.coordinator import coordinator as swarm_coordinator + swarm_coordinator.initialize() rec = swarm_coordinator._recovery_summary if rec["tasks_failed"] or rec["agents_offlined"]: logger.info( @@ -442,18 +447,14 @@ async def lifespan(app: FastAPI): # Start chat integrations in background chat_task = asyncio.create_task(_start_chat_integrations_background()) - # Register Discord bot - from integrations.chat_bridge.registry import platform_registry - from integrations.chat_bridge.vendors.discord import discord_bot - platform_registry.register(discord_bot) - logger.info("✓ Timmy Time dashboard ready for requests") yield # Cleanup on shutdown from integrations.telegram_bot.bot import telegram_bot - + from integrations.chat_bridge.vendors.discord import discord_bot + await discord_bot.stop() await telegram_bot.stop() diff --git a/src/dashboard/routes/health.py b/src/dashboard/routes/health.py index 686104c..7e8a140 100644 --- a/src/dashboard/routes/health.py +++ b/src/dashboard/routes/health.py @@ -4,6 +4,7 @@ Provides system health checks and sovereignty audit information for the Mission Control dashboard. """ +import asyncio import logging import os from datetime import datetime, timezone @@ -23,8 +24,8 @@ router = APIRouter(tags=["health"]) # Legacy health check for backward compatibility -async def check_ollama() -> bool: - """Legacy helper to check Ollama status.""" +def _check_ollama_sync() -> bool: + """Synchronous Ollama check — run via asyncio.to_thread().""" try: import urllib.request url = settings.ollama_url.replace("localhost", "127.0.0.1") @@ -39,6 +40,14 @@ async def check_ollama() -> bool: return False +async def check_ollama() -> bool: + """Check Ollama status without blocking the event loop.""" + try: + return await asyncio.to_thread(_check_ollama_sync) + except Exception: + return False + + class DependencyStatus(BaseModel): """Status of a single dependency.""" name: str @@ -67,8 +76,8 @@ class HealthStatus(BaseModel): _START_TIME = datetime.now(timezone.utc) -def _check_ollama() -> DependencyStatus: - """Check Ollama AI backend status.""" +def _check_ollama_status_sync() -> DependencyStatus: + """Synchronous Ollama status check — run via asyncio.to_thread().""" try: import urllib.request url = settings.ollama_url.replace("localhost", "127.0.0.1") @@ -90,7 +99,7 @@ def _check_ollama() -> DependencyStatus: pass except Exception: pass - + return DependencyStatus( name="Ollama AI", status="unavailable", @@ -99,11 +108,24 @@ def _check_ollama() -> DependencyStatus: ) +async def _check_ollama() -> DependencyStatus: + """Check Ollama AI backend status without blocking the event loop.""" + try: + return await asyncio.to_thread(_check_ollama_status_sync) + except Exception: + return DependencyStatus( + name="Ollama AI", + status="unavailable", + sovereignty_score=10, + details={"url": settings.ollama_url, "error": "Cannot connect to Ollama"}, + ) + + def _check_redis() -> DependencyStatus: """Check Redis cache status.""" try: - from swarm.comms import SwarmComms - comms = SwarmComms() + from swarm.coordinator import coordinator + comms = coordinator.comms # Check if we're using fallback if hasattr(comms, '_redis') and comms._redis is not None: return DependencyStatus( @@ -280,7 +302,7 @@ async def sovereignty_check(): Use this to verify the system is operating in a sovereign manner. """ dependencies = [ - _check_ollama(), + await _check_ollama(), _check_redis(), _check_lightning(), _check_sqlite(), diff --git a/src/swarm/comms.py b/src/swarm/comms.py index 7e07006..7a50d14 100644 --- a/src/swarm/comms.py +++ b/src/swarm/comms.py @@ -56,7 +56,11 @@ class SwarmComms: def _try_connect(self) -> None: try: import redis - self._redis = redis.from_url(self._redis_url) + self._redis = redis.from_url( + self._redis_url, + socket_connect_timeout=3, + socket_timeout=3, + ) self._redis.ping() self._pubsub = self._redis.pubsub() self._connected = True diff --git a/src/swarm/coordinator.py b/src/swarm/coordinator.py index 19bd3d2..d67ce46 100644 --- a/src/swarm/coordinator.py +++ b/src/swarm/coordinator.py @@ -53,6 +53,10 @@ class SwarmCoordinator: self.auctions = AuctionManager() self.comms = SwarmComms() self._in_process_nodes: list = [] + self._recovery_summary = {"tasks_failed": 0, "agents_offlined": 0} + + def initialize(self) -> None: + """Run startup recovery. Call during app lifespan, not at import time.""" self._recovery_summary = reconcile_on_startup() # ── Agent lifecycle ───────────────────────────────────────────────────── diff --git a/tests/self_coding/test_git_tools.py b/tests/self_coding/test_git_tools.py index cbb28e7..de74231 100644 --- a/tests/self_coding/test_git_tools.py +++ b/tests/self_coding/test_git_tools.py @@ -28,11 +28,12 @@ def git_repo(tmp_path): result = git_init(tmp_path) assert result["success"] - # Configure git identity for commits + # Configure git identity and disable signing for commits from git import Repo repo = Repo(str(tmp_path)) repo.config_writer().set_value("user", "name", "Test").release() repo.config_writer().set_value("user", "email", "test@test.com").release() + repo.config_writer().set_value("commit", "gpgsign", "false").release() # Create initial commit readme = tmp_path / "README.md" diff --git a/tests/swarm/test_swarm_recovery.py b/tests/swarm/test_swarm_recovery.py index 4076bee..9f1d3ae 100644 --- a/tests/swarm/test_swarm_recovery.py +++ b/tests/swarm/test_swarm_recovery.py @@ -156,9 +156,10 @@ def test_reconcile_counts_multiple_stale_agents(): # ── Coordinator integration ─────────────────────────────────────────────────── def test_coordinator_runs_recovery_on_init(): - """Coordinator.__init__ calls reconcile; _recovery_summary must be present.""" + """Coordinator.initialize() populates _recovery_summary.""" from swarm.coordinator import SwarmCoordinator coord = SwarmCoordinator() + coord.initialize() assert hasattr(coord, "_recovery_summary") assert "tasks_failed" in coord._recovery_summary assert "agents_offlined" in coord._recovery_summary @@ -166,7 +167,7 @@ def test_coordinator_runs_recovery_on_init(): def test_coordinator_recovery_cleans_stale_task(): - """End-to-end: task left in BIDDING is cleaned up by a fresh coordinator.""" + """End-to-end: task left in BIDDING is cleaned up after initialize().""" from swarm.tasks import create_task, get_task, update_task, TaskStatus from swarm.coordinator import SwarmCoordinator @@ -174,6 +175,7 @@ def test_coordinator_recovery_cleans_stale_task(): update_task(task.id, status=TaskStatus.BIDDING) coord = SwarmCoordinator() + coord.initialize() assert get_task(task.id).status == TaskStatus.FAILED assert coord._recovery_summary["tasks_failed"] >= 1 coord.manager.stop_all()