1
0
This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/src/swarm/docker_runner.py
Alexander Payne 4020b5222f feat: add Docker-based swarm agent containerization
Add infrastructure for running swarm agents as isolated Docker
containers with HTTP-based coordination, startup recovery, and
enhanced dashboard UI for agent management.

- Dockerfile and docker-compose.yml for multi-service orchestration
- DockerAgentRunner for programmatic container lifecycle management
- Internal HTTP API for container agents to poll tasks and submit bids
- Startup recovery system to reconcile orphaned tasks and stale agents
- Enhanced UI partials for agent panels, chat, and task assignment
- Timmy docker entry point with heartbeat and task polling
- New Makefile targets for Docker workflows
- Tests for swarm recovery

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-22 16:21:32 -05:00

188 lines
6.1 KiB
Python

"""Docker-backed agent runner — spawn swarm agents as isolated containers.
Drop-in complement to SwarmManager. Instead of Python subprocesses,
DockerAgentRunner launches each agent as a Docker container that shares
the data volume and communicates with the coordinator over HTTP.
Requirements
------------
- Docker Engine running on the host (``docker`` CLI in PATH)
- The ``timmy-time:latest`` image already built (``make docker-build``)
- ``data/`` directory exists and is mounted at ``/app/data`` in each container
Communication
-------------
Container agents use the coordinator's internal HTTP API rather than the
in-memory SwarmComms channel::
GET /internal/tasks → poll for tasks open for bidding
POST /internal/bids → submit a bid
The ``COORDINATOR_URL`` env var tells agents where to reach the coordinator.
Inside the docker-compose network this is ``http://dashboard:8000``.
From the host it is typically ``http://localhost:8000``.
Usage
-----
::
from swarm.docker_runner import DockerAgentRunner
runner = DockerAgentRunner()
info = runner.spawn("Echo", capabilities="summarise,translate")
print(info) # {"container_id": "...", "name": "Echo", "agent_id": "..."}
runner.stop(info["container_id"])
runner.stop_all()
"""
import logging
import subprocess
import uuid
from dataclasses import dataclass, field
from typing import Optional
logger = logging.getLogger(__name__)
DEFAULT_IMAGE = "timmy-time:latest"
DEFAULT_COORDINATOR_URL = "http://dashboard:8000"
@dataclass
class ManagedContainer:
container_id: str
agent_id: str
name: str
image: str
capabilities: str = ""
class DockerAgentRunner:
"""Spawn and manage swarm agents as Docker containers."""
def __init__(
self,
image: str = DEFAULT_IMAGE,
coordinator_url: str = DEFAULT_COORDINATOR_URL,
extra_env: Optional[dict] = None,
) -> None:
self.image = image
self.coordinator_url = coordinator_url
self.extra_env = extra_env or {}
self._containers: dict[str, ManagedContainer] = {}
# ── Public API ────────────────────────────────────────────────────────────
def spawn(
self,
name: str,
agent_id: Optional[str] = None,
capabilities: str = "",
image: Optional[str] = None,
) -> dict:
"""Spawn a new agent container and return its info dict.
The container runs ``python -m swarm.agent_runner`` and communicates
with the coordinator over HTTP via ``COORDINATOR_URL``.
"""
aid = agent_id or str(uuid.uuid4())
img = image or self.image
container_name = f"timmy-agent-{aid[:8]}"
env_flags = self._build_env_flags(aid, name, capabilities)
cmd = [
"docker", "run",
"--detach",
"--name", container_name,
"--network", "timmy-time_swarm-net",
"--volume", "timmy-time_timmy-data:/app/data",
"--extra-hosts", "host.docker.internal:host-gateway",
*env_flags,
img,
"python", "-m", "swarm.agent_runner",
"--agent-id", aid,
"--name", name,
]
try:
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=15
)
if result.returncode != 0:
raise RuntimeError(result.stderr.strip())
container_id = result.stdout.strip()
except FileNotFoundError:
raise RuntimeError(
"Docker CLI not found. Is Docker Desktop running?"
)
managed = ManagedContainer(
container_id=container_id,
agent_id=aid,
name=name,
image=img,
capabilities=capabilities,
)
self._containers[container_id] = managed
logger.info(
"Docker agent %s (%s) started — container %s",
name, aid, container_id[:12],
)
return {
"container_id": container_id,
"agent_id": aid,
"name": name,
"image": img,
"capabilities": capabilities,
}
def stop(self, container_id: str) -> bool:
"""Stop and remove a container agent."""
try:
subprocess.run(
["docker", "rm", "-f", container_id],
capture_output=True, timeout=10,
)
self._containers.pop(container_id, None)
logger.info("Docker agent container %s stopped", container_id[:12])
return True
except Exception as exc:
logger.error("Failed to stop container %s: %s", container_id[:12], exc)
return False
def stop_all(self) -> int:
"""Stop all containers managed by this runner."""
ids = list(self._containers.keys())
stopped = sum(1 for cid in ids if self.stop(cid))
return stopped
def list_containers(self) -> list[ManagedContainer]:
return list(self._containers.values())
def is_running(self, container_id: str) -> bool:
"""Return True if the container is currently running."""
try:
result = subprocess.run(
["docker", "inspect", "--format", "{{.State.Running}}", container_id],
capture_output=True, text=True, timeout=5,
)
return result.stdout.strip() == "true"
except Exception:
return False
# ── Internal ──────────────────────────────────────────────────────────────
def _build_env_flags(self, agent_id: str, name: str, capabilities: str) -> list[str]:
env = {
"COORDINATOR_URL": self.coordinator_url,
"AGENT_NAME": name,
"AGENT_ID": agent_id,
"AGENT_CAPABILITIES": capabilities,
**self.extra_env,
}
flags = []
for k, v in env.items():
flags += ["--env", f"{k}={v}"]
return flags