Files
timmy-config/training/Makefile
Claude (Opus 4.6) 099948b3d1
Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 8s
PR Checklist / pr-checklist (pull_request) Failing after 2m12s
Smoke Test / smoke (pull_request) Failing after 13s
Validate Config / YAML Lint (pull_request) Failing after 9s
Validate Config / JSON Validate (pull_request) Successful in 10s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 7s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 16s
Validate Config / Shell Script Lint (pull_request) Failing after 15s
Validate Config / Cron Syntax Check (pull_request) Successful in 7s
Validate Config / Playbook Schema Validation (pull_request) Successful in 13s
Architecture Lint / Lint Repository (pull_request) Has been cancelled
Validate Config / Python Test Suite (pull_request) Has been cancelled
fix: use PYTHON variable in training Makefile
On macOS where only python3 is installed (no python shim), bare
`python` calls fail with 'No such file or directory'.

Adds `PYTHON ?= python3` variable. Replaces all bare `python`
calls with `$(PYTHON)` across: train-local, vibes,
adversary-value-violations, ingest, curated, convert.

Override: make vibes PYTHON=python

Closes #660
Refs Timmy_Foundation/the-nexus#1471
2026-04-17 06:39:05 +00:00

84 lines
4.3 KiB
Makefile

# AutoLoRA Training Pipeline
# Replaces: autolora repo (1,500 lines) with config + make targets
#
# Prerequisites:
# pip install axolotl mlx-lm lm-evaluation-harness pyyaml
#
# Targets:
# make train-cloud -- QLoRA on cloud GPU via Axolotl
# make train-local -- LoRA on Apple Silicon via MLX
# make eval -- Standard benchmarks via lm-eval-harness
# make vibes -- Hand-picked prompts through Ollama, human review
# make ingest -- Pull heartbeat trajectories into training data
# make curated -- Regenerate curated exemplar dataset
PYTHON ?= python3
MODEL ?= timmy:v0.1-q4
BASELINE ?= hermes3:latest
OLLAMA_URL ?= http://localhost:11434
OUTPUT ?= output
# -- Training --
train-cloud: ## QLoRA fine-tune on cloud GPU (Axolotl)
axolotl train axolotl.yaml
train-local: ## LoRA fine-tune on Apple Silicon (MLX)
$(PYTHON) -m mlx_lm.lora --config mlx-lora.yaml
# -- Evaluation --
eval: ## Run standard benchmarks against Ollama model
lm_eval --model local-completions \
--model_args "model=$(MODEL),base_url=$(OLLAMA_URL)/v1,tokenized_requests=False" \
--tasks hellaswag,truthfulqa_mc2,arc_challenge,winogrande \
--output_path evals_archive/$(MODEL)/
@echo "Results in evals_archive/$(MODEL)/"
eval-baseline: ## Run same benchmarks against baseline for comparison
lm_eval --model local-completions \
--model_args "model=$(BASELINE),base_url=$(OLLAMA_URL)/v1,tokenized_requests=False" \
--tasks hellaswag,truthfulqa_mc2,arc_challenge,winogrande \
--output_path evals_archive/$(BASELINE)/
vibes: ## Run vibes check -- hand-picked prompts, human review
@echo "=== Vibes Check: $(MODEL) ==="
@echo "Date: $$(date '+%Y-%m-%d %H:%M')" > $(OUTPUT)/vibes-$(MODEL).md
@echo "Model: $(MODEL)" >> $(OUTPUT)/vibes-$(MODEL).md
@echo "" >> $(OUTPUT)/vibes-$(MODEL).md
@$(PYTHON) -c "import yaml, subprocess, sys; prompts = yaml.safe_load(open('data/prompts_vibes.yaml'))['prompts']; f = open('$(OUTPUT)/vibes-$(MODEL).md', 'a'); [(sys.stdout.write(f' [{p[\"id\"]}] {p[\"category\"]}...'), sys.stdout.flush(), f.write(f'## [{p[\"id\"]}] {p[\"category\"]}\n'), f.write(f'PROMPT: {p[\"prompt\"]}\n'), f.write(f'EXPECTED: {p[\"expected\"]}\n\n'), f.write('RESPONSE:\n'), f.write(subprocess.run(['ollama', 'run', '$(MODEL)', p['prompt']], capture_output=True, text=True, timeout=120).stdout), f.write('\n\nSCORE: ___/5\n\n---\n\n'), print(' done')) for p in prompts]; f.close()"
@echo "Output: $(OUTPUT)/vibes-$(MODEL).md -- fill in scores manually."
adversary-value-violations: ## Run 200-prompt value-violations adversary suite
@mkdir -p $(OUTPUT)/adversary-value-violations
$(PYTHON) run_adversary_eval.py \
--suite data/prompts_adversary_value_violations.yaml \
--model $(MODEL) \
--output-dir $(OUTPUT)/adversary-value-violations
@echo "Output: $(OUTPUT)/adversary-value-violations"
# -- Data Pipeline --
ingest: ## Pull heartbeat trajectories into training data
$(PYTHON) ingest_trajectories.py \
--trajectories ~/.nexus/trajectories/ \
--curated data/curated_dataset.jsonl \
--output data/merged_training_data.jsonl
@echo "Merged dataset ready. Convert for MLX with: make convert"
curated: ## Regenerate curated exemplar dataset
$(PYTHON) build_curated.py
@echo "Curated dataset regenerated."
convert: ## Convert merged dataset to MLX format (train/valid split)
@mkdir -p data/mlx_curated
$(PYTHON) -c "import json; lines = open('data/merged_training_data.jsonl').readlines(); sessions = [json.loads(l) for l in lines]; ROLE_MAP = {'system':'system','human':'user','gpt':'assistant','tool':'user'}; converted = [{'messages': [{'role': ROLE_MAP.get(t.get('from',''),'user'), 'content': t.get('value','')} for t in s.get('conversations',[])]} for s in sessions]; split = max(1, int(len(converted)*0.9)); open('data/mlx_curated/train.jsonl','w').writelines(json.dumps(c)+'\n' for c in converted[:split]); open('data/mlx_curated/valid.jsonl','w').writelines(json.dumps(c)+'\n' for c in converted[split:]); print(f'train: {split}, valid: {len(converted)-split}')"
# -- Helpers --
.PHONY: train-cloud train-local eval eval-baseline vibes adversary-value-violations ingest curated convert help
help: ## Show this help
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-16s\033[0m %s\n", $$1, $$2}'