diff --git a/.githooks/pre-commit b/.githooks/pre-commit
index 3cc1226e..f8219672 100755
--- a/.githooks/pre-commit
+++ b/.githooks/pre-commit
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Pre-commit hook: lint + test with a wall-clock limit.
+# Pre-commit hook: format + test via tox.
 # Blocks the commit if formatting, imports, or tests fail.
 # Current baseline: ~18s wall-clock. Limit set to 30s for headroom.
 #
@@ -7,30 +7,28 @@
 
 set -e
 
-echo "Auto-formatting (black + isort)..."
-poetry run black --line-length 100 src/ tests/ --quiet
-poetry run isort --profile black --line-length 100 src/ tests/ --quiet 2>/dev/null
-git add -u
-
 MAX_SECONDS=30
 
-echo "Running tests (${MAX_SECONDS}s limit)..."
+echo "Running pre-commit gate via tox (${MAX_SECONDS}s limit)..."
 
 # macOS lacks GNU timeout; use perl as a portable fallback.
 if command -v timeout &>/dev/null; then
-    timeout "${MAX_SECONDS}" poetry run pytest tests -q --tb=short --timeout=10
+    timeout "${MAX_SECONDS}" tox -e pre-commit
 else
-    perl -e "alarm ${MAX_SECONDS}; exec @ARGV" -- poetry run pytest tests -q --tb=short --timeout=10
+    perl -e "alarm ${MAX_SECONDS}; exec @ARGV" -- tox -e pre-commit
 fi
 exit_code=$?
 
+# Re-stage any files that black/isort reformatted
+git add -u
+
 if [ "$exit_code" -eq 142 ] || [ "$exit_code" -eq 124 ]; then
     echo ""
-    echo "BLOCKED: tests exceeded ${MAX_SECONDS}s wall-clock limit."
+    echo "BLOCKED: pre-commit gate exceeded ${MAX_SECONDS}s wall-clock limit."
     echo "Speed up slow tests before committing."
     exit 1
 elif [ "$exit_code" -ne 0 ]; then
     echo ""
-    echo "BLOCKED: tests failed."
+    echo "BLOCKED: pre-commit gate failed."
     exit 1
 fi
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index ab4042a8..b8bb30b9 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -16,17 +16,11 @@ jobs:
         with:
           python-version: "3.11"
 
-      - name: Install linters
-        run: pip install black==26.3.0 isort==5.13.2 bandit==1.8.0
+      - name: Install tox
+        run: pip install tox
 
-      - name: Check formatting (black)
-        run: black --check --line-length 100 src/ tests/
-
-      - name: Check import order (isort)
-        run: isort --check --profile black --line-length 100 src/ tests/
-
-      - name: Security scan (bandit)
-        run: bandit -r src/ -ll -s B101,B104,B307,B310,B324,B601,B608 -q
+      - name: Lint (black + isort + bandit via tox)
+        run: tox -e lint
 
   test:
     runs-on: ubuntu-latest
@@ -45,31 +39,18 @@ jobs:
         with:
           python-version: "3.11"
 
-      - name: Cache Poetry virtualenv
+      - name: Cache pip
         uses: actions/cache@v4
         with:
-          path: |
-            ~/.cache/pypoetry
-            ~/.cache/pip
-          key: poetry-${{ hashFiles('poetry.lock') }}
-          restore-keys: poetry-
+          path: ~/.cache/pip
+          key: pip-${{ hashFiles('poetry.lock') }}
+          restore-keys: pip-
 
-      - name: Install dependencies
-        run: |
-          pip install poetry
-          poetry install --with dev
+      - name: Install tox
+        run: pip install tox
 
-      - name: Run tests
-        run: |
-          mkdir -p reports
-          poetry run pytest \
-            --cov=src \
-            --cov-report=term-missing \
-            --cov-report=xml:reports/coverage.xml \
-            --cov-fail-under=73 \
-            --junitxml=reports/junit.xml \
-            -p no:xdist \
-            -m "not ollama and not docker and not selenium and not external_api"
+      - name: Run tests (via tox)
+        run: tox -e ci
 
       # Posts a check annotation + PR comment showing pass/fail counts.
       # Visible in the GitHub mobile app under Checks and in PR conversations.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1d41b984..ebd9139e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -51,25 +51,16 @@ repos:
         exclude: ^tests/
         stages: [manual]
 
-  # Unit tests only with 30-second wall-clock limit.
-  # Runs only fast unit tests on commit; full suite runs in CI.
+  # Format + unit tests via tox (30s wall-clock limit).
+  # Runs tox pre-commit env; full suite runs in CI.
   - repo: local
     hooks:
-      - id: pytest-fast
-        name: pytest unit (30s limit)
-        entry: timeout 30 poetry run pytest
+      - id: tox-pre-commit
+        name: tox pre-commit (format + unit tests)
+        entry: tox -e pre-commit
         language: system
         types: [python]
         stages: [pre-commit]
         pass_filenames: false
         always_run: true
-        args:
-          - tests
-          - -q
-          - --tb=short
-          - --timeout=10
-          - -p
-          - no:xdist
-          - --ignore=tests/e2e
-          - --ignore=tests/functional
         verbose: true
diff --git a/CLAUDE.md b/CLAUDE.md
index 42524173..0ddcf022 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -44,13 +44,45 @@ New routes: `src/dashboard/routes/<name>.py` → register in `src/dashboard/app.
 
 ---
 
-## Testing
+## Development Environments (tox)
+
+**tox is the single source of truth for all Python environments.**
+Never run `poetry run` directly — always go through tox. All environment
+config (deps, markers, flags) lives in `tox.ini`. The Makefile and CI
+both delegate to tox.
+
+### Quick reference
 
 ```bash
-make test               # Quick run (no Ollama needed)
-make test-cov           # With coverage (term-missing + XML)
+tox -e unit             # Fast unit tests (default pre-commit gate)
+tox -e ci               # Full CI suite with coverage + JUnit XML
+tox -e lint             # black --check + isort --check + bandit
+tox -e format           # Auto-format (black + isort)
+tox -e dev              # Start dashboard with auto-reload
 ```
 
+### All tox environments
+
+| Environment | Purpose |
+|---|---|
+| `lint` | Check formatting + imports + security |
+| `format` | Auto-format code |
+| `typecheck` | mypy static analysis |
+| `unit` | Fast unit tests, parallel |
+| `integration` | Integration tests, parallel |
+| `functional` | Functional tests, sequential |
+| `e2e` | End-to-end tests |
+| `fast` | unit + integration combined |
+| `ollama` | Live LLM tests (requires Ollama) |
+| `ci` | Coverage + JUnit XML (mirrors GitHub Actions) |
+| `coverage` | Coverage terminal + XML |
+| `coverage-html` | Coverage HTML report |
+| `pre-push` | Lint + full CI suite (mirrors GitHub Actions exactly) |
+| `dev` | uvicorn with auto-reload |
+| `all` | All tests, parallel |
+
+### Testing notes
+
 - **Stubs in conftest:** `agno`, `airllm`, `pyttsx3`, `telegram`, `discord`
   stubbed via `sys.modules.setdefault()` — tests run without those packages
 - **Test mode:** `TIMMY_TEST_MODE=1` set automatically in conftest
@@ -62,7 +94,7 @@ make test-cov           # With coverage (term-missing + XML)
 
 ## Key Conventions
 
-1. **Tests must stay green.** Run `make test` before committing.
+1. **Tests must stay green.** Run `tox -e unit` before committing. Run `tox -e pre-push` before pushing (mirrors CI exactly).
 2. **No cloud AI dependencies.** All inference on localhost.
 3. **Keep the root directory clean.** No new top-level files without purpose.
 4. **Follow existing patterns** — singletons, graceful degradation, pydantic config.
@@ -71,6 +103,7 @@ make test-cov           # With coverage (term-missing + XML)
 7. **Keep routes thin** — business logic lives in the module, not the route.
 8. **Prefer editing existing files** over creating new ones.
 9. **Use `from config import settings`** for all env-var access.
+10. **Use tox for everything.** Never run `poetry run` directly — use `tox -e <env>`.
 
 ---
 
diff --git a/Makefile b/Makefile
index a9a8477a..8d5f059e 100644
--- a/Makefile
+++ b/Makefile
@@ -5,10 +5,7 @@
         cloud-deploy cloud-up cloud-down cloud-logs cloud-status cloud-update \
         logs-up logs-down logs-kibana
 
-PYTEST      := poetry run pytest
-UVICORN     := poetry run uvicorn
-SELF_TDD    := poetry run self-tdd
-PYTHON      := poetry run python
+TOX         := tox
 
 # ── Setup ─────────────────────────────────────────────────────────────────────
 
@@ -33,7 +30,7 @@ install-bigbrain:
 # ── Development ───────────────────────────────────────────────────────────────
 
 dev: nuke
-	PYTHONDONTWRITEBYTECODE=1 $(UVICORN) dashboard.app:app --reload --host 0.0.0.0 --port 8000
+	PYTHONDONTWRITEBYTECODE=1 $(TOX) -e dev
 
 # Kill anything on port 8000, stop Docker containers, clear stale state.
 # Safe to run anytime — idempotent, never errors out.
@@ -83,43 +80,40 @@ ip:
 	@echo ""
 
 watch:
-	$(SELF_TDD) watch --interval 60
+	poetry run self-tdd watch --interval 60
 
-# ── Testing ───────────────────────────────────────────────────────────────────
+# ── Testing (all via tox) ─────────────────────────────────────────────────────
 
 test:
-	$(PYTEST) tests/ -q --tb=short -n auto --dist worksteal
+	$(TOX) -e all
 
 test-unit:
-	$(PYTEST) tests -m "unit" --tb=short -v -n auto --dist worksteal
+	$(TOX) -e unit
 
 test-integration:
-	$(PYTEST) tests -m "integration" --tb=short -v -n auto --dist worksteal
+	$(TOX) -e integration
 
 test-functional:
-	$(PYTEST) tests -m "functional and not slow and not selenium" --tb=short -v -n0
+	$(TOX) -e functional
 
 test-e2e:
-	$(PYTEST) tests -m "e2e" --tb=short -v -n0
+	$(TOX) -e e2e
 
 test-fast:
-	$(PYTEST) tests -m "unit or integration" --tb=short -v -n auto --dist worksteal
+	$(TOX) -e fast
 
 test-ci:
-	$(PYTEST) tests -m "not skip_ci" --tb=short --cov=src --cov-report=term-missing --cov-fail-under=73 -p no:xdist
+	$(TOX) -e ci
 
 test-cov:
-	$(PYTEST) tests/ --cov=src --cov-report=term-missing --cov-report=xml --cov-fail-under=73 -q -p no:xdist
+	$(TOX) -e coverage
 
 test-cov-html:
-	$(PYTEST) tests/ --cov=src --cov-report=term-missing --cov-report=html --cov-fail-under=73 -q -p no:xdist
+	$(TOX) -e coverage-html
 	@echo "✓ HTML coverage report: open htmlcov/index.html"
 
-# Full-stack functional test: spins up Ollama (CPU, qwen2.5:0.5b) + dashboard
-# in Docker and verifies real LLM chat end-to-end.
-# Override model: make test-ollama OLLAMA_TEST_MODEL=tinyllama
 test-ollama:
-	FUNCTIONAL_DOCKER=1 $(PYTEST) tests/functional/test_ollama_chat.py -v --tb=long -x
+	$(TOX) -e ollama
 
 # ── Docker test containers ───────────────────────────────────────────────────
 # Clean containers from cached images; source bind-mounted for fast iteration.
@@ -156,15 +150,13 @@ test-docker-down:
 # ── Code quality ──────────────────────────────────────────────────────────────
 
 lint:
-	$(PYTHON) -m black --check src tests --line-length=100
-	$(PYTHON) -m isort --check-only src tests --profile=black --line-length=100
+	$(TOX) -e lint
 
 format:
-	$(PYTHON) -m black src tests --line-length=100
-	$(PYTHON) -m isort src tests --profile=black --line-length=100
+	$(TOX) -e format
 
 type-check:
-	mypy src --ignore-missing-imports --no-error-summary
+	$(TOX) -e typecheck
 
 pre-commit-install:
 	pre-commit install
diff --git a/tox.ini b/tox.ini
index 86f09f5a..4b32e722 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,23 +1,158 @@
 [tox]
-envlist = unit, integration
-skipsdist = true
+envlist = lint, unit, integration
+no_package = true
 
+# ── Base ─────────────────────────────────────────────────────────────────────
 [testenv]
-allowlist_externals = poetry
-commands_pre = poetry install --with dev --quiet
+allowlist_externals = timeout, perl, docker, mkdir
+commands_pre = pip install -e ".[dev]" --quiet
+
+setenv =
+    TIMMY_TEST_MODE = 1
+    TIMMY_DISABLE_CSRF = 1
+    TIMMY_SKIP_EMBEDDINGS = 1
+
+# ── Lint & Format ────────────────────────────────────────────────────────────
+
+[testenv:lint]
+description = Check formatting (black), import order (isort), security (bandit)
+commands_pre =
+deps =
+    black
+    isort
+    bandit>=1.8.0
+commands =
+    black --check --line-length 100 src/ tests/
+    isort --check-only --profile black --line-length 100 src/ tests/
+    bandit -r src/ -ll -s B101,B104,B307,B310,B324,B601,B608 -q
+
+[testenv:format]
+description = Auto-format code with black + isort
+commands_pre =
+deps =
+    black
+    isort
+commands =
+    black --line-length 100 src/ tests/
+    isort --profile black --line-length 100 src/ tests/
+
+[testenv:typecheck]
+description = Static type checking with mypy
+commands =
+    mypy src --ignore-missing-imports --no-error-summary
+
+# ── Test Environments ────────────────────────────────────────────────────────
 
 [testenv:unit]
-description = Fast unit tests (no I/O, no external services)
-commands = poetry run pytest tests/ -q --tb=short -m "unit and not integration and not ollama and not docker and not selenium and not external_api"
+description = Fast tests — excludes e2e, functional, and external services
+commands =
+    pytest tests/ -q --tb=short \
+        --ignore=tests/e2e \
+        --ignore=tests/functional \
+        -m "not ollama and not docker and not selenium and not external_api and not skip_ci" \
+        -n auto --dist worksteal
 
 [testenv:integration]
-description = Integration tests (may use SQLite, but no external services)
-commands = poetry run pytest tests/ -q --tb=short -m "integration and not unit and not ollama and not docker and not selenium and not external_api"
+description = Integration tests (marked with @pytest.mark.integration)
+commands =
+    pytest tests/ -q --tb=short \
+        -m "integration and not ollama and not docker and not selenium and not external_api" \
+        -n auto --dist worksteal
+
+[testenv:functional]
+description = Functional tests — real HTTP, no mocking (excl slow + selenium)
+commands =
+    pytest tests/functional/ -q --tb=short -n0 \
+        -m "not slow and not selenium"
+
+[testenv:e2e]
+description = End-to-end tests — full system, may be slow
+commands =
+    pytest tests/e2e/ -q --tb=short -n0
+
+[testenv:fast]
+description = All tests except e2e, functional, and external
+commands =
+    pytest tests/ -q --tb=short \
+        --ignore=tests/e2e \
+        --ignore=tests/functional \
+        -m "not ollama and not docker and not selenium and not external_api" \
+        -n auto --dist worksteal
 
 [testenv:ollama]
-description = Live LLM tests via Ollama (requires Ollama running with a tiny model)
-commands = poetry run pytest tests/ -q --tb=short -m ollama --timeout=120
+description = Live LLM tests via Ollama (requires running Ollama)
+commands =
+    pytest tests/ -q --tb=short -m ollama --timeout=120
+
+# ── CI / Coverage ────────────────────────────────────────────────────────────
+
+[testenv:ci]
+description = CI test suite with coverage + JUnit XML (mirrors GitHub Actions)
+commands =
+    mkdir -p reports
+    pytest tests/ \
+        --cov=src \
+        --cov-report=term-missing \
+        --cov-report=xml:reports/coverage.xml \
+        --cov-fail-under=73 \
+        --junitxml=reports/junit.xml \
+        -p no:xdist \
+        -m "not ollama and not docker and not selenium and not external_api"
 
 [testenv:coverage]
-description = Run all tests with coverage reporting
-commands = poetry run pytest tests/ -q --tb=short --cov=src --cov-report=term-missing --cov-fail-under=73 -p no:xdist -m "not ollama and not docker and not selenium and not external_api"
+description = Full coverage report (terminal + XML)
+commands =
+    pytest tests/ -q --tb=short \
+        --cov=src \
+        --cov-report=term-missing \
+        --cov-report=xml \
+        --cov-fail-under=73 \
+        -p no:xdist \
+        -m "not ollama and not docker and not selenium and not external_api"
+
+[testenv:coverage-html]
+description = Coverage with HTML report
+commands =
+    pytest tests/ -q --tb=short \
+        --cov=src \
+        --cov-report=term-missing \
+        --cov-report=html \
+        --cov-fail-under=73 \
+        -p no:xdist \
+        -m "not ollama and not docker and not selenium and not external_api"
+
+# ── Pre-push (mirrors CI exactly) ────────────────────────────────────────────
+
+[testenv:pre-push]
+description = Local gate — lint + full CI suite (same as GitHub Actions)
+deps =
+    black
+    isort
+    bandit>=1.8.0
+commands =
+    black --check --line-length 100 src/ tests/
+    isort --check-only --profile black --line-length 100 src/ tests/
+    bandit -r src/ -ll -s B101,B104,B307,B310,B324,B601,B608 -q
+    mkdir -p reports
+    pytest tests/ \
+        --cov=src \
+        --cov-report=term-missing \
+        --cov-report=xml:reports/coverage.xml \
+        --cov-fail-under=73 \
+        --junitxml=reports/junit.xml \
+        -p no:xdist \
+        -m "not ollama and not docker and not selenium and not external_api"
+
+# ── Dev Server ───────────────────────────────────────────────────────────────
+
+[testenv:dev]
+description = Start dashboard with auto-reload (local development)
+commands =
+    uvicorn dashboard.app:app --reload --host 0.0.0.0 --port 8000
+
+# ── All Tests (parallel) ─────────────────────────────────────────────────────
+
+[testenv:all]
+description = Run all tests in parallel
+commands =
+    pytest tests/ -q --tb=short -n auto --dist worksteal