Consolidate test & dev workflows into tox as single source of truth (#160)

* Centralize all Python environments on tox tox.ini is now the single source of truth for how every Python environment runs — tests, linting, formatting, dev server, and CI. No more bare `poetry run` outside of tox. - Expand tox.ini from 4 to 15 environments (lint, format, typecheck, unit, integration, functional, e2e, fast, ollama, ci, coverage, coverage-html, pre-commit, dev, all) - Rewire all Makefile test/lint/format/dev targets to delegate to tox - Update .githooks/pre-commit to run `tox -e pre-commit` - Update .pre-commit-config.yaml to use tox instead of poetry run - Update CI workflow (lint + test jobs) to use `tox -e lint` and `tox -e ci` instead of ad-hoc pytest/black/isort invocations - Update CLAUDE.md to mandate tox usage and document all environments https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 * refactor: modernize tox.ini for tox 4.x conventions - Replace `skipsdist = true` (tox 3 alias) with `no_package = true` - Use `poetry install --no-root --sync` for faster, cleaner dep installs https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 * fix(ci): drop poetry install from lint/format tox envs Lint and format only need black, isort, and bandit — not the full project dependency tree. Override commands_pre to empty and use tox deps instead. Fixes CI failure where poetry is not on PATH. https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 * fix(ci): remove poetry run wrapper from all tox commands Since commands_pre runs poetry install into the tox-managed venv, all tools (pytest, mypy, black, etc.) are already on the venv PATH. The poetry run wrapper is redundant and fails in CI where poetry may not be installed globally. https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 * fix(ci): remove poetry dependency, align local and CI processes - Replace `poetry install` with `pip install -e ".[dev]"` in tox commands_pre so all envs work without poetry installed - Remove Poetry cache from GitHub Actions (only pip cache needed) - Rename pre-commit env to pre-push: runs lint + full CI suite (same checks as GitHub Actions, reports generated locally) - Update CLAUDE.md to reflect new pre-push workflow The local `tox -e pre-push` now runs the exact same lint + test + coverage checks as CI, so failures are caught before pushing. https://claude.ai/code/session_01MTUpqms1fgezZFrodGA8H5 --------- Co-authored-by: Claude <noreply@anthropic.com>
2026-03-10 15:54:09 -04:00
parent 2a5f317a12
commit 6303a77f6e
6 changed files with 227 additions and 97 deletions
--- a/42
+++ b/42
@@ -5,10 +5,7 @@
        cloud-deploy cloud-up cloud-down cloud-logs cloud-status cloud-update \
        logs-up logs-down logs-kibana

-PYTEST      := poetry run pytest
-UVICORN     := poetry run uvicorn
-SELF_TDD    := poetry run self-tdd
-PYTHON      := poetry run python
+TOX         := tox

 # ── Setup ─────────────────────────────────────────────────────────────────────

@@ -33,7 +30,7 @@ install-bigbrain:
 # ── Development ───────────────────────────────────────────────────────────────

 dev: nuke
-	PYTHONDONTWRITEBYTECODE=1 $(UVICORN) dashboard.app:app --reload --host 0.0.0.0 --port 8000
+	PYTHONDONTWRITEBYTECODE=1 $(TOX) -e dev

 # Kill anything on port 8000, stop Docker containers, clear stale state.
 # Safe to run anytime — idempotent, never errors out.
@@ -83,43 +80,40 @@ ip:
 	@echo ""

 watch:
-	$(SELF_TDD) watch --interval 60
+	poetry run self-tdd watch --interval 60

-# ── Testing ───────────────────────────────────────────────────────────────────
+# ── Testing (all via tox) ─────────────────────────────────────────────────────

 test:
-	$(PYTEST) tests/ -q --tb=short -n auto --dist worksteal
+	$(TOX) -e all

 test-unit:
-	$(PYTEST) tests -m "unit" --tb=short -v -n auto --dist worksteal
+	$(TOX) -e unit

 test-integration:
-	$(PYTEST) tests -m "integration" --tb=short -v -n auto --dist worksteal
+	$(TOX) -e integration

 test-functional:
-	$(PYTEST) tests -m "functional and not slow and not selenium" --tb=short -v -n0
+	$(TOX) -e functional

 test-e2e:
-	$(PYTEST) tests -m "e2e" --tb=short -v -n0
+	$(TOX) -e e2e

 test-fast:
-	$(PYTEST) tests -m "unit or integration" --tb=short -v -n auto --dist worksteal
+	$(TOX) -e fast

 test-ci:
-	$(PYTEST) tests -m "not skip_ci" --tb=short --cov=src --cov-report=term-missing --cov-fail-under=73 -p no:xdist
+	$(TOX) -e ci

 test-cov:
-	$(PYTEST) tests/ --cov=src --cov-report=term-missing --cov-report=xml --cov-fail-under=73 -q -p no:xdist
+	$(TOX) -e coverage

 test-cov-html:
-	$(PYTEST) tests/ --cov=src --cov-report=term-missing --cov-report=html --cov-fail-under=73 -q -p no:xdist
+	$(TOX) -e coverage-html
 	@echo "✓ HTML coverage report: open htmlcov/index.html"

-# Full-stack functional test: spins up Ollama (CPU, qwen2.5:0.5b) + dashboard
-# in Docker and verifies real LLM chat end-to-end.
-# Override model: make test-ollama OLLAMA_TEST_MODEL=tinyllama
 test-ollama:
-	FUNCTIONAL_DOCKER=1 $(PYTEST) tests/functional/test_ollama_chat.py -v --tb=long -x
+	$(TOX) -e ollama

 # ── Docker test containers ───────────────────────────────────────────────────
 # Clean containers from cached images; source bind-mounted for fast iteration.
@@ -156,15 +150,13 @@ test-docker-down:
 # ── Code quality ──────────────────────────────────────────────────────────────

 lint:
-	$(PYTHON) -m black --check src tests --line-length=100
-	$(PYTHON) -m isort --check-only src tests --profile=black --line-length=100
+	$(TOX) -e lint

 format:
-	$(PYTHON) -m black src tests --line-length=100
-	$(PYTHON) -m isort src tests --profile=black --line-length=100
+	$(TOX) -e format

 type-check:
-	mypy src --ignore-missing-imports --no-error-summary
+	$(TOX) -e typecheck

 pre-commit-install:
 	pre-commit install