Consolidate test & dev workflows into tox as single source of truth (#160)

* Centralize all Python environments on tox tox.ini is now the single source of truth for how every Python environment runs — tests, linting, formatting, dev server, and CI. No more bare `poetry run` outside of tox. - Expand tox.ini from 4 to 15 environments (lint, format, typecheck, unit, integration, functional, e2e, fast, ollama, ci, coverage, coverage-html, pre-commit, dev, all) - Rewire all Makefile test/lint/format/dev targets to delegate to tox - Update .githooks/pre-commit to run `tox -e pre-commit` - Update .pre-commit-config.yaml to use tox instead of poetry run - Update CI workflow (lint + test jobs) to use `tox -e lint` and `tox -e ci` instead of ad-hoc pytest/black/isort invocations - Update CLAUDE.md to mandate tox usage and document all environments https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 * refactor: modernize tox.ini for tox 4.x conventions - Replace `skipsdist = true` (tox 3 alias) with `no_package = true` - Use `poetry install --no-root --sync` for faster, cleaner dep installs https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 * fix(ci): drop poetry install from lint/format tox envs Lint and format only need black, isort, and bandit — not the full project dependency tree. Override commands_pre to empty and use tox deps instead. Fixes CI failure where poetry is not on PATH. https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 * fix(ci): remove poetry run wrapper from all tox commands Since commands_pre runs poetry install into the tox-managed venv, all tools (pytest, mypy, black, etc.) are already on the venv PATH. The poetry run wrapper is redundant and fails in CI where poetry may not be installed globally. https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 * fix(ci): remove poetry dependency, align local and CI processes - Replace `poetry install` with `pip install -e ".[dev]"` in tox commands_pre so all envs work without poetry installed - Remove Poetry cache from GitHub Actions (only pip cache needed) - Rename pre-commit env to pre-push: runs lint + full CI suite (same checks as GitHub Actions, reports generated locally) - Update CLAUDE.md to reflect new pre-push workflow The local `tox -e pre-push` now runs the exact same lint + test + coverage checks as CI, so failures are caught before pushing. https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-10 15:54:09 -04:00
parent 2a5f317a12
commit 6303a77f6e
6 changed files with 227 additions and 97 deletions
--- a/.githooks/pre-commit
+++ b/.githooks/pre-commit
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Pre-commit hook: lint + test with a wall-clock limit.
+# Pre-commit hook: format + test via tox.
 # Blocks the commit if formatting, imports, or tests fail.
 # Current baseline: ~18s wall-clock. Limit set to 30s for headroom.
 #
@@ -7,30 +7,28 @@

 set -e

-echo "Auto-formatting (black + isort)..."
-poetry run black --line-length 100 src/ tests/ --quiet
-poetry run isort --profile black --line-length 100 src/ tests/ --quiet 2>/dev/null
-git add -u
-
 MAX_SECONDS=30

-echo "Running tests (${MAX_SECONDS}s limit)..."
+echo "Running pre-commit gate via tox (${MAX_SECONDS}s limit)..."

 # macOS lacks GNU timeout; use perl as a portable fallback.
 if command -v timeout &>/dev/null; then
-    timeout "${MAX_SECONDS}" poetry run pytest tests -q --tb=short --timeout=10
+    timeout "${MAX_SECONDS}" tox -e pre-commit
 else
-    perl -e "alarm ${MAX_SECONDS}; exec @ARGV" -- poetry run pytest tests -q --tb=short --timeout=10
+    perl -e "alarm ${MAX_SECONDS}; exec @ARGV" -- tox -e pre-commit
 fi
 exit_code=$?

+# Re-stage any files that black/isort reformatted
+git add -u
+
 if [ "$exit_code" -eq 142 ] || [ "$exit_code" -eq 124 ]; then
    echo ""
-    echo "BLOCKED: tests exceeded ${MAX_SECONDS}s wall-clock limit."
+    echo "BLOCKED: pre-commit gate exceeded ${MAX_SECONDS}s wall-clock limit."
    echo "Speed up slow tests before committing."
    exit 1
 elif [ "$exit_code" -ne 0 ]; then
    echo ""
-    echo "BLOCKED: tests failed."
+    echo "BLOCKED: pre-commit gate failed."
    exit 1
 fi
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -16,17 +16,11 @@ jobs:
        with:
          python-version: "3.11"

-      - name: Install linters
-        run: pip install black==26.3.0 isort==5.13.2 bandit==1.8.0
+      - name: Install tox
+        run: pip install tox

-      - name: Check formatting (black)
-        run: black --check --line-length 100 src/ tests/
-
-      - name: Check import order (isort)
-        run: isort --check --profile black --line-length 100 src/ tests/
-
-      - name: Security scan (bandit)
-        run: bandit -r src/ -ll -s B101,B104,B307,B310,B324,B601,B608 -q
+      - name: Lint (black + isort + bandit via tox)
+        run: tox -e lint

  test:
    runs-on: ubuntu-latest
@@ -45,31 +39,18 @@ jobs:
        with:
          python-version: "3.11"

-      - name: Cache Poetry virtualenv
+      - name: Cache pip
        uses: actions/cache@v4
        with:
-          path: |
-            ~/.cache/pypoetry
-            ~/.cache/pip
-          key: poetry-${{ hashFiles('poetry.lock') }}
-          restore-keys: poetry-
+          path: ~/.cache/pip
+          key: pip-${{ hashFiles('poetry.lock') }}
+          restore-keys: pip-

-      - name: Install dependencies
-        run: |
-          pip install poetry
-          poetry install --with dev
+      - name: Install tox
+        run: pip install tox

-      - name: Run tests
-        run: |
-          mkdir -p reports
-          poetry run pytest \
-            --cov=src \
-            --cov-report=term-missing \
-            --cov-report=xml:reports/coverage.xml \
-            --cov-fail-under=73 \
-            --junitxml=reports/junit.xml \
-            -p no:xdist \
-            -m "not ollama and not docker and not selenium and not external_api"
+      - name: Run tests (via tox)
+        run: tox -e ci

      # Posts a check annotation + PR comment showing pass/fail counts.
      # Visible in the GitHub mobile app under Checks and in PR conversations.
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -51,25 +51,16 @@ repos:
        exclude: ^tests/
        stages: [manual]

-  # Unit tests only with 30-second wall-clock limit.
-  # Runs only fast unit tests on commit; full suite runs in CI.
+  # Format + unit tests via tox (30s wall-clock limit).
+  # Runs tox pre-commit env; full suite runs in CI.
  - repo: local
    hooks:
-      - id: pytest-fast
-        name: pytest unit (30s limit)
-        entry: timeout 30 poetry run pytest
+      - id: tox-pre-commit
+        name: tox pre-commit (format + unit tests)
+        entry: tox -e pre-commit
        language: system
        types: [python]
        stages: [pre-commit]
        pass_filenames: false
        always_run: true
-        args:
-          - tests
-          - -q
-          - --tb=short
-          - --timeout=10
-          - -p
-          - no:xdist
-          - --ignore=tests/e2e
-          - --ignore=tests/functional
        verbose: true
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -44,13 +44,45 @@ New routes: `src/dashboard/routes/<name>.py` → register in `src/dashboard/app.

 ---

-## Testing
+## Development Environments (tox)
+
+**tox is the single source of truth for all Python environments.**
+Never run `poetry run` directly — always go through tox. All environment
+config (deps, markers, flags) lives in `tox.ini`. The Makefile and CI
+both delegate to tox.
+
+### Quick reference

 ```bash
-make test               # Quick run (no Ollama needed)
-make test-cov           # With coverage (term-missing + XML)
+tox -e unit             # Fast unit tests (default pre-commit gate)
+tox -e ci               # Full CI suite with coverage + JUnit XML
+tox -e lint             # black --check + isort --check + bandit
+tox -e format           # Auto-format (black + isort)
+tox -e dev              # Start dashboard with auto-reload
 ```

+### All tox environments
+
+| Environment | Purpose |
+|---|---|
+| `lint` | Check formatting + imports + security |
+| `format` | Auto-format code |
+| `typecheck` | mypy static analysis |
+| `unit` | Fast unit tests, parallel |
+| `integration` | Integration tests, parallel |
+| `functional` | Functional tests, sequential |
+| `e2e` | End-to-end tests |
+| `fast` | unit + integration combined |
+| `ollama` | Live LLM tests (requires Ollama) |
+| `ci` | Coverage + JUnit XML (mirrors GitHub Actions) |
+| `coverage` | Coverage terminal + XML |
+| `coverage-html` | Coverage HTML report |
+| `pre-push` | Lint + full CI suite (mirrors GitHub Actions exactly) |
+| `dev` | uvicorn with auto-reload |
+| `all` | All tests, parallel |
+
+### Testing notes
+
 - **Stubs in conftest:** `agno`, `airllm`, `pyttsx3`, `telegram`, `discord`
  stubbed via `sys.modules.setdefault()` — tests run without those packages
 - **Test mode:** `TIMMY_TEST_MODE=1` set automatically in conftest
@@ -62,7 +94,7 @@ make test-cov           # With coverage (term-missing + XML)

 ## Key Conventions

-1. **Tests must stay green.** Run `make test` before committing.
+1. **Tests must stay green.** Run `tox -e unit` before committing. Run `tox -e pre-push` before pushing (mirrors CI exactly).
 2. **No cloud AI dependencies.** All inference on localhost.
 3. **Keep the root directory clean.** No new top-level files without purpose.
 4. **Follow existing patterns** — singletons, graceful degradation, pydantic config.
@@ -71,6 +103,7 @@ make test-cov           # With coverage (term-missing + XML)
 7. **Keep routes thin** — business logic lives in the module, not the route.
 8. **Prefer editing existing files** over creating new ones.
 9. **Use `from config import settings`** for all env-var access.
+10. **Use tox for everything.** Never run `poetry run` directly — use `tox -e <env>`.

 ---

--- a/42
+++ b/42
@@ -5,10 +5,7 @@
        cloud-deploy cloud-up cloud-down cloud-logs cloud-status cloud-update \
        logs-up logs-down logs-kibana

-PYTEST      := poetry run pytest
-UVICORN     := poetry run uvicorn
-SELF_TDD    := poetry run self-tdd
-PYTHON      := poetry run python
+TOX         := tox

 # ── Setup ─────────────────────────────────────────────────────────────────────

@@ -33,7 +30,7 @@ install-bigbrain:
 # ── Development ───────────────────────────────────────────────────────────────

 dev: nuke
-	PYTHONDONTWRITEBYTECODE=1 $(UVICORN) dashboard.app:app --reload --host 0.0.0.0 --port 8000
+	PYTHONDONTWRITEBYTECODE=1 $(TOX) -e dev

 # Kill anything on port 8000, stop Docker containers, clear stale state.
 # Safe to run anytime — idempotent, never errors out.
@@ -83,43 +80,40 @@ ip:
 	@echo ""

 watch:
-	$(SELF_TDD) watch --interval 60
+	poetry run self-tdd watch --interval 60

-# ── Testing ───────────────────────────────────────────────────────────────────
+# ── Testing (all via tox) ─────────────────────────────────────────────────────

 test:
-	$(PYTEST) tests/ -q --tb=short -n auto --dist worksteal
+	$(TOX) -e all

 test-unit:
-	$(PYTEST) tests -m "unit" --tb=short -v -n auto --dist worksteal
+	$(TOX) -e unit

 test-integration:
-	$(PYTEST) tests -m "integration" --tb=short -v -n auto --dist worksteal
+	$(TOX) -e integration

 test-functional:
-	$(PYTEST) tests -m "functional and not slow and not selenium" --tb=short -v -n0
+	$(TOX) -e functional

 test-e2e:
-	$(PYTEST) tests -m "e2e" --tb=short -v -n0
+	$(TOX) -e e2e

 test-fast:
-	$(PYTEST) tests -m "unit or integration" --tb=short -v -n auto --dist worksteal
+	$(TOX) -e fast

 test-ci:
-	$(PYTEST) tests -m "not skip_ci" --tb=short --cov=src --cov-report=term-missing --cov-fail-under=73 -p no:xdist
+	$(TOX) -e ci

 test-cov:
-	$(PYTEST) tests/ --cov=src --cov-report=term-missing --cov-report=xml --cov-fail-under=73 -q -p no:xdist
+	$(TOX) -e coverage

 test-cov-html:
-	$(PYTEST) tests/ --cov=src --cov-report=term-missing --cov-report=html --cov-fail-under=73 -q -p no:xdist
+	$(TOX) -e coverage-html
 	@echo "✓ HTML coverage report: open htmlcov/index.html"

-# Full-stack functional test: spins up Ollama (CPU, qwen2.5:0.5b) + dashboard
-# in Docker and verifies real LLM chat end-to-end.
-# Override model: make test-ollama OLLAMA_TEST_MODEL=tinyllama
 test-ollama:
-	FUNCTIONAL_DOCKER=1 $(PYTEST) tests/functional/test_ollama_chat.py -v --tb=long -x
+	$(TOX) -e ollama

 # ── Docker test containers ───────────────────────────────────────────────────
 # Clean containers from cached images; source bind-mounted for fast iteration.
@@ -156,15 +150,13 @@ test-docker-down:
 # ── Code quality ──────────────────────────────────────────────────────────────

 lint:
-	$(PYTHON) -m black --check src tests --line-length=100
-	$(PYTHON) -m isort --check-only src tests --profile=black --line-length=100
+	$(TOX) -e lint

 format:
-	$(PYTHON) -m black src tests --line-length=100
-	$(PYTHON) -m isort src tests --profile=black --line-length=100
+	$(TOX) -e format

 type-check:
-	mypy src --ignore-missing-imports --no-error-summary
+	$(TOX) -e typecheck

 pre-commit-install:
 	pre-commit install
--- a/tox.ini
+++ b/tox.ini
@@ -1,23 +1,158 @@
 [tox]
-envlist = unit, integration
-skipsdist = true
+envlist = lint, unit, integration
+no_package = true

+# ── Base ─────────────────────────────────────────────────────────────────────
 [testenv]
-allowlist_externals = poetry
-commands_pre = poetry install --with dev --quiet
+allowlist_externals = timeout, perl, docker, mkdir
+commands_pre = pip install -e ".[dev]" --quiet
+
+setenv =
+    TIMMY_TEST_MODE = 1
+    TIMMY_DISABLE_CSRF = 1
+    TIMMY_SKIP_EMBEDDINGS = 1
+
+# ── Lint & Format ────────────────────────────────────────────────────────────
+
+[testenv:lint]
+description = Check formatting (black), import order (isort), security (bandit)
+commands_pre =
+deps =
+    black
+    isort
+    bandit>=1.8.0
+commands =
+    black --check --line-length 100 src/ tests/
+    isort --check-only --profile black --line-length 100 src/ tests/
+    bandit -r src/ -ll -s B101,B104,B307,B310,B324,B601,B608 -q
+
+[testenv:format]
+description = Auto-format code with black + isort
+commands_pre =
+deps =
+    black
+    isort
+commands =
+    black --line-length 100 src/ tests/
+    isort --profile black --line-length 100 src/ tests/
+
+[testenv:typecheck]
+description = Static type checking with mypy
+commands =
+    mypy src --ignore-missing-imports --no-error-summary
+
+# ── Test Environments ────────────────────────────────────────────────────────

 [testenv:unit]
-description = Fast unit tests (no I/O, no external services)
-commands = poetry run pytest tests/ -q --tb=short -m "unit and not integration and not ollama and not docker and not selenium and not external_api"
+description = Fast tests — excludes e2e, functional, and external services
+commands =
+    pytest tests/ -q --tb=short \
+        --ignore=tests/e2e \
+        --ignore=tests/functional \
+        -m "not ollama and not docker and not selenium and not external_api and not skip_ci" \
+        -n auto --dist worksteal

 [testenv:integration]
-description = Integration tests (may use SQLite, but no external services)
-commands = poetry run pytest tests/ -q --tb=short -m "integration and not unit and not ollama and not docker and not selenium and not external_api"
+description = Integration tests (marked with @pytest.mark.integration)
+commands =
+    pytest tests/ -q --tb=short \
+        -m "integration and not ollama and not docker and not selenium and not external_api" \
+        -n auto --dist worksteal
+
+[testenv:functional]
+description = Functional tests — real HTTP, no mocking (excl slow + selenium)
+commands =
+    pytest tests/functional/ -q --tb=short -n0 \
+        -m "not slow and not selenium"
+
+[testenv:e2e]
+description = End-to-end tests — full system, may be slow
+commands =
+    pytest tests/e2e/ -q --tb=short -n0
+
+[testenv:fast]
+description = All tests except e2e, functional, and external
+commands =
+    pytest tests/ -q --tb=short \
+        --ignore=tests/e2e \
+        --ignore=tests/functional \
+        -m "not ollama and not docker and not selenium and not external_api" \
+        -n auto --dist worksteal

 [testenv:ollama]
-description = Live LLM tests via Ollama (requires Ollama running with a tiny model)
-commands = poetry run pytest tests/ -q --tb=short -m ollama --timeout=120
+description = Live LLM tests via Ollama (requires running Ollama)
+commands =
+    pytest tests/ -q --tb=short -m ollama --timeout=120
+
+# ── CI / Coverage ────────────────────────────────────────────────────────────
+
+[testenv:ci]
+description = CI test suite with coverage + JUnit XML (mirrors GitHub Actions)
+commands =
+    mkdir -p reports
+    pytest tests/ \
+        --cov=src \
+        --cov-report=term-missing \
+        --cov-report=xml:reports/coverage.xml \
+        --cov-fail-under=73 \
+        --junitxml=reports/junit.xml \
+        -p no:xdist \
+        -m "not ollama and not docker and not selenium and not external_api"

 [testenv:coverage]
-description = Run all tests with coverage reporting
-commands = poetry run pytest tests/ -q --tb=short --cov=src --cov-report=term-missing --cov-fail-under=73 -p no:xdist -m "not ollama and not docker and not selenium and not external_api"
+description = Full coverage report (terminal + XML)
+commands =
+    pytest tests/ -q --tb=short \
+        --cov=src \
+        --cov-report=term-missing \
+        --cov-report=xml \
+        --cov-fail-under=73 \
+        -p no:xdist \
+        -m "not ollama and not docker and not selenium and not external_api"
+
+[testenv:coverage-html]
+description = Coverage with HTML report
+commands =
+    pytest tests/ -q --tb=short \
+        --cov=src \
+        --cov-report=term-missing \
+        --cov-report=html \
+        --cov-fail-under=73 \
+        -p no:xdist \
+        -m "not ollama and not docker and not selenium and not external_api"
+
+# ── Pre-push (mirrors CI exactly) ────────────────────────────────────────────
+
+[testenv:pre-push]
+description = Local gate — lint + full CI suite (same as GitHub Actions)
+deps =
+    black
+    isort
+    bandit>=1.8.0
+commands =
+    black --check --line-length 100 src/ tests/
+    isort --check-only --profile black --line-length 100 src/ tests/
+    bandit -r src/ -ll -s B101,B104,B307,B310,B324,B601,B608 -q
+    mkdir -p reports
+    pytest tests/ \
+        --cov=src \
+        --cov-report=term-missing \
+        --cov-report=xml:reports/coverage.xml \
+        --cov-fail-under=73 \
+        --junitxml=reports/junit.xml \
+        -p no:xdist \
+        -m "not ollama and not docker and not selenium and not external_api"
+
+# ── Dev Server ───────────────────────────────────────────────────────────────
+
+[testenv:dev]
+description = Start dashboard with auto-reload (local development)
+commands =
+    uvicorn dashboard.app:app --reload --host 0.0.0.0 --port 8000
+
+# ── All Tests (parallel) ─────────────────────────────────────────────────────
+
+[testenv:all]
+description = Run all tests in parallel
+commands =
+    pytest tests/ -q --tb=short -n auto --dist worksteal