diff --git a/deploy-containers.yml b/deploy-containers.yml index bb6403a3..42c12111 100644 --- a/deploy-containers.yml +++ b/deploy-containers.yml @@ -3,13 +3,35 @@ hosts: wizards become: yes tasks: - - name: Install Docker - shell: curl -fsSL https://get.docker.com | sh - args: - creates: /usr/bin/docker + - name: Install Docker prerequisites + apt: + name: + - apt-transport-https + - ca-certificates + - curl + - gnupg + - lsb-release + state: present + update_cache: yes - - name: Install Docker Compose - shell: apt-get update && apt-get install -y docker-compose-plugin + - name: Add Docker GPG key + apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + state: present + + - name: Add Docker repository + apt_repository: + repo: deb [arch=amd64] https://download.docker.com/linux/ubuntu ${{ ansible_distribution_release }} stable + state: present + + - name: Install Docker Engine + apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-compose-plugin + state: present - name: Create Docker Compose directory file: @@ -26,4 +48,7 @@ - docker/ - name: Start the Fleet - shell: "cd /opt/hermes-fleet && docker compose up -d --build" + community.docker.docker_compose_v2: + project_src: /opt/hermes-fleet + state: present + build: always diff --git a/docker-compose.yml b/docker-compose.yml index aa96ede5..635866c3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,12 +1,73 @@ version: "3.8" +x-wizard-base: &wizard-base + build: + context: . + dockerfile: docker/agent.Dockerfile + restart: unless-stopped + networks: + - fleet + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8643/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 15s + services: + ollama: + image: ollama/ollama:latest + container_name: ollama + restart: unless-stopped + ports: + - "${OLLAMA_PORT:-11434}:11434" + volumes: + - ollama_data:/root/.ollama + networks: + - fleet + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + gitea: + image: gitea/gitea:latest + container_name: gitea + restart: unless-stopped + environment: + - USER_UID=${GITEA_UID:-1000} + - USER_GID=${GITEA_GID:-1000} + - GITEA__server__ROOT_URL=${GITEA_ROOT_URL:-http://gitea:3000} + - GITEA__server__DOMAIN=${GITEA_DOMAIN:-gitea} + - GITEA__server__SSH_PORT=${GITEA_SSH_PORT:-2222} + - GITEA__database__DB_TYPE=sqlite3 + ports: + - "${GITEA_WEB_PORT:-3000}:3000" + - "${GITEA_SSH_PORT:-2222}:22" + volumes: + - gitea_data:/data + - /etc/timezone:/etc/timezone:ro + - /etc/localtime:/etc/localtime:ro + networks: + - fleet + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/api/v1/version"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 30s + ezra: - build: - context: . - dockerfile: docker/agent.Dockerfile - args: - AGENT_REPO: "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git" + <<: *wizard-base container_name: wizard-ezra ports: - "8643:8643" @@ -15,14 +76,12 @@ services: - /root/wizards/ezra/models:/app/models environment: - WIZARD_NAME=ezra - restart: always + depends_on: + ollama: { condition: service_healthy } + gitea: { condition: service_healthy } bezalel: - build: - context: . - dockerfile: docker/agent.Dockerfile - args: - AGENT_REPO: "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git" + <<: *wizard-base container_name: wizard-bezalel ports: - "8644:8643" @@ -31,14 +90,12 @@ services: - /root/wizards/bezalel/models:/app/models environment: - WIZARD_NAME=bezalel - restart: always + depends_on: + ollama: { condition: service_healthy } + gitea: { condition: service_healthy } allegro: - build: - context: . - dockerfile: docker/agent.Dockerfile - args: - AGENT_REPO: "https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git" + <<: *wizard-base container_name: wizard-allegro ports: - "8645:8643" @@ -47,4 +104,44 @@ services: - /root/wizards/allegro/models:/app/models environment: - WIZARD_NAME=allegro - restart: always + depends_on: + ollama: { condition: service_healthy } + gitea: { condition: service_healthy } + + monitor: + image: curlimages/curl:latest + container_name: monitor + restart: unless-stopped + entrypoint: /bin/sh + command: + - -c + - | + while true; do + echo "[monitor] Checking services..." + for svc in ollama:11434/api/tags gitea:3000/api/v1/version ezra:8643/health bezalel:8643/health allegro:8643/health; do + host=$(echo \"$\" | cut -d: -f1) + url=\"http://$\" + if curl -sf \"$\" > /dev/null 2>&1; then + echo \"[monitor] $: OK\" + else + echo \"[monitor] $: DOWN\" + fi + done + sleep ${MONITOR_INTERVAL:-60} + done + networks: + - fleet + depends_on: + - ollama + - gitea + +volumes: + ollama_data: + driver: local + gitea_data: + driver: local + +networks: + fleet: + driver: bridge + name: fleet-net diff --git a/docker/agent.Dockerfile b/docker/agent.Dockerfile index b35fea66..f116a942 100644 --- a/docker/agent.Dockerfile +++ b/docker/agent.Dockerfile @@ -1,17 +1,20 @@ # --- Build Stage --- FROM python:3.11-slim AS builder +# Pin versions for reproducibility ARG AGENT_REPO="https://forge.alexanderwhitestone.com/Timmy_Foundation/hermes-agent.git" +ARG AGENT_COMMIT="a89dae9942997b55d1b66330ff62de24ab3d6170" +ARG UV_VERSION="0.5.1" -RUN apt-get update && apt-get install -y --no-install-recommends curl git build-essential cmake ca-certificates && curl -LsSf https://astral.sh/uv/install.sh | sh && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y --no-install-recommends curl git build-essential cmake ca-certificates && curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && rm -rf /var/lib/apt/lists/* ENV PATH="/root/.local/bin:${PATH}" WORKDIR /app -# Clone the actual source code -RUN git clone ${AGENT_REPO} . +# Pin the clone to a specific commit for deterministic builds +RUN git clone ${AGENT_REPO} . && git checkout ${AGENT_COMMIT} -# Install dependencies using uv +# Install dependencies and project into venv RUN uv sync --frozen --no-dev # --- Final Stage --- @@ -19,11 +22,15 @@ FROM python:3.11-slim RUN apt-get update && apt-get install -y --no-install-recommends curl git socat ripgrep ffmpeg ca-certificates && rm -rf /var/lib/apt/lists/* -# Copy the virtual environment and the source from builder -COPY --from=builder /app/.venv /app/.venv +# Copy only the venv and the app source COPY --from=builder /app /app ENV PATH="/app/.venv/bin:${PATH}" WORKDIR /app + +# Healthcheck to verify the gateway is responding +HEALTHCHECK --interval=30s --timeout=10s --start-period=15s --retries=3 \ + CMD curl -f http://localhost:8643/health || exit 1 + EXPOSE 8643 ENTRYPOINT ["hermes", "gateway"]