forked from Rockachopa/Timmy-time-dashboard
Add infrastructure for running swarm agents as isolated Docker containers with HTTP-based coordination, startup recovery, and enhanced dashboard UI for agent management. - Dockerfile and docker-compose.yml for multi-service orchestration - DockerAgentRunner for programmatic container lifecycle management - Internal HTTP API for container agents to poll tasks and submit bids - Startup recovery system to reconcile orphaned tasks and stale agents - Enhanced UI partials for agent panels, chat, and task assignment - Timmy docker entry point with heartbeat and task polling - New Makefile targets for Docker workflows - Tests for swarm recovery Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
188 lines
6.1 KiB
Python
188 lines
6.1 KiB
Python
"""Docker-backed agent runner — spawn swarm agents as isolated containers.
|
|
|
|
Drop-in complement to SwarmManager. Instead of Python subprocesses,
|
|
DockerAgentRunner launches each agent as a Docker container that shares
|
|
the data volume and communicates with the coordinator over HTTP.
|
|
|
|
Requirements
|
|
------------
|
|
- Docker Engine running on the host (``docker`` CLI in PATH)
|
|
- The ``timmy-time:latest`` image already built (``make docker-build``)
|
|
- ``data/`` directory exists and is mounted at ``/app/data`` in each container
|
|
|
|
Communication
|
|
-------------
|
|
Container agents use the coordinator's internal HTTP API rather than the
|
|
in-memory SwarmComms channel::
|
|
|
|
GET /internal/tasks → poll for tasks open for bidding
|
|
POST /internal/bids → submit a bid
|
|
|
|
The ``COORDINATOR_URL`` env var tells agents where to reach the coordinator.
|
|
Inside the docker-compose network this is ``http://dashboard:8000``.
|
|
From the host it is typically ``http://localhost:8000``.
|
|
|
|
Usage
|
|
-----
|
|
::
|
|
|
|
from swarm.docker_runner import DockerAgentRunner
|
|
|
|
runner = DockerAgentRunner()
|
|
info = runner.spawn("Echo", capabilities="summarise,translate")
|
|
print(info) # {"container_id": "...", "name": "Echo", "agent_id": "..."}
|
|
|
|
runner.stop(info["container_id"])
|
|
runner.stop_all()
|
|
"""
|
|
|
|
import logging
|
|
import subprocess
|
|
import uuid
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
DEFAULT_IMAGE = "timmy-time:latest"
|
|
DEFAULT_COORDINATOR_URL = "http://dashboard:8000"
|
|
|
|
|
|
@dataclass
|
|
class ManagedContainer:
|
|
container_id: str
|
|
agent_id: str
|
|
name: str
|
|
image: str
|
|
capabilities: str = ""
|
|
|
|
|
|
class DockerAgentRunner:
|
|
"""Spawn and manage swarm agents as Docker containers."""
|
|
|
|
def __init__(
|
|
self,
|
|
image: str = DEFAULT_IMAGE,
|
|
coordinator_url: str = DEFAULT_COORDINATOR_URL,
|
|
extra_env: Optional[dict] = None,
|
|
) -> None:
|
|
self.image = image
|
|
self.coordinator_url = coordinator_url
|
|
self.extra_env = extra_env or {}
|
|
self._containers: dict[str, ManagedContainer] = {}
|
|
|
|
# ── Public API ────────────────────────────────────────────────────────────
|
|
|
|
def spawn(
|
|
self,
|
|
name: str,
|
|
agent_id: Optional[str] = None,
|
|
capabilities: str = "",
|
|
image: Optional[str] = None,
|
|
) -> dict:
|
|
"""Spawn a new agent container and return its info dict.
|
|
|
|
The container runs ``python -m swarm.agent_runner`` and communicates
|
|
with the coordinator over HTTP via ``COORDINATOR_URL``.
|
|
"""
|
|
aid = agent_id or str(uuid.uuid4())
|
|
img = image or self.image
|
|
container_name = f"timmy-agent-{aid[:8]}"
|
|
|
|
env_flags = self._build_env_flags(aid, name, capabilities)
|
|
|
|
cmd = [
|
|
"docker", "run",
|
|
"--detach",
|
|
"--name", container_name,
|
|
"--network", "timmy-time_swarm-net",
|
|
"--volume", "timmy-time_timmy-data:/app/data",
|
|
"--extra-hosts", "host.docker.internal:host-gateway",
|
|
*env_flags,
|
|
img,
|
|
"python", "-m", "swarm.agent_runner",
|
|
"--agent-id", aid,
|
|
"--name", name,
|
|
]
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd, capture_output=True, text=True, timeout=15
|
|
)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(result.stderr.strip())
|
|
container_id = result.stdout.strip()
|
|
except FileNotFoundError:
|
|
raise RuntimeError(
|
|
"Docker CLI not found. Is Docker Desktop running?"
|
|
)
|
|
|
|
managed = ManagedContainer(
|
|
container_id=container_id,
|
|
agent_id=aid,
|
|
name=name,
|
|
image=img,
|
|
capabilities=capabilities,
|
|
)
|
|
self._containers[container_id] = managed
|
|
logger.info(
|
|
"Docker agent %s (%s) started — container %s",
|
|
name, aid, container_id[:12],
|
|
)
|
|
return {
|
|
"container_id": container_id,
|
|
"agent_id": aid,
|
|
"name": name,
|
|
"image": img,
|
|
"capabilities": capabilities,
|
|
}
|
|
|
|
def stop(self, container_id: str) -> bool:
|
|
"""Stop and remove a container agent."""
|
|
try:
|
|
subprocess.run(
|
|
["docker", "rm", "-f", container_id],
|
|
capture_output=True, timeout=10,
|
|
)
|
|
self._containers.pop(container_id, None)
|
|
logger.info("Docker agent container %s stopped", container_id[:12])
|
|
return True
|
|
except Exception as exc:
|
|
logger.error("Failed to stop container %s: %s", container_id[:12], exc)
|
|
return False
|
|
|
|
def stop_all(self) -> int:
|
|
"""Stop all containers managed by this runner."""
|
|
ids = list(self._containers.keys())
|
|
stopped = sum(1 for cid in ids if self.stop(cid))
|
|
return stopped
|
|
|
|
def list_containers(self) -> list[ManagedContainer]:
|
|
return list(self._containers.values())
|
|
|
|
def is_running(self, container_id: str) -> bool:
|
|
"""Return True if the container is currently running."""
|
|
try:
|
|
result = subprocess.run(
|
|
["docker", "inspect", "--format", "{{.State.Running}}", container_id],
|
|
capture_output=True, text=True, timeout=5,
|
|
)
|
|
return result.stdout.strip() == "true"
|
|
except Exception:
|
|
return False
|
|
|
|
# ── Internal ──────────────────────────────────────────────────────────────
|
|
|
|
def _build_env_flags(self, agent_id: str, name: str, capabilities: str) -> list[str]:
|
|
env = {
|
|
"COORDINATOR_URL": self.coordinator_url,
|
|
"AGENT_NAME": name,
|
|
"AGENT_ID": agent_id,
|
|
"AGENT_CAPABILITIES": capabilities,
|
|
**self.extra_env,
|
|
}
|
|
flags = []
|
|
for k, v in env.items():
|
|
flags += ["--env", f"{k}={v}"]
|
|
return flags
|