diff --git a/scripts/pipeline_training_factory.sh b/scripts/pipeline_training_factory.sh
new file mode 100755
index 00000000..ab33ed97
--- /dev/null
+++ b/scripts/pipeline_training_factory.sh
@@ -0,0 +1,188 @@
+#!/usr/bin/env bash
+# pipeline_training_factory.sh — Generate Timmy Voice training data to reach 10K pairs.
+#
+# This is the Training Factory pipeline. It checks existing timmy-voice training
+# data count and generates just enough new pairs to reach the 10,000 target.
+# Uses the existing curated_dataset.jsonl as seed prompts and applies quality
+# filtering per SOUL.md.
+#
+# Usage:
+#   ./scripts/pipeline_training_factory.sh                    # Run with default 150k token budget
+#   ./scripts/pipeline_training_factory.sh --max-tokens 200000
+#
+# Exit codes: 0 = success, 1 = failure, 2 = validation failed
+
+set -euo pipefail
+
+HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
+BUDGET_FILE="$HERMES_HOME/pipeline_budget.json"
+LOG_DIR="$HERMES_HOME/logs"
+LOG_FILE="$LOG_DIR/pipeline-training-factory.log"
+TRAINING_DATA_DIR="$(cd "$(dirname "$0")/../training-data" && pwd)"
+
+# Token budget handling
+DAILY_LIMIT="${PIPELINE_DAILY_LIMIT:-150000}"
+
+ensure_dirs() {
+    mkdir -p "$(dirname "$LOG_FILE")" "$(dirname "$BUDGET_FILE")"
+}
+
+log() {
+    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE"
+}
+
+get_tokens_used_today() {
+    if [[ -f "$BUDGET_FILE" ]]; then
+        local today
+        today=$(date +%Y-%m-%d)
+        python3 -c "
+import json, sys
+try:
+    with open('$BUDGET_FILE') as f:
+        d = json.load(f)
+    print(d.get('daily', {}).get('$today', {}).get('tokens_used', 0))
+except Exception:
+    print(0)
+" 2>/dev/null || echo 0
+    else
+        echo 0
+    fi
+}
+
+record_usage() {
+    local tokens="$1"
+    local today
+    today=$(date +%Y-%m-%d)
+    python3 -c "
+import json, os
+path = '$BUDGET_FILE'
+d = {}
+if os.path.exists(path):
+    with open(path) as f:
+        d = json.load(f)
+daily = d.setdefault('daily', {})
+day = daily.setdefault('$today', {'tokens_used': 0, 'pipelines': {}})
+day['tokens_used'] = day.get('tokens_used', 0) + $tokens
+day['pipelines']['training-factory'] = day['pipelines'].get('training-factory', 0) + $tokens
+with open(path, 'w') as f:
+    json.dump(d, f, indent=2)
+" 2>/dev/null || true
+}
+
+# Parse args
+MAX_TOKENS=150000
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --max-tokens)
+            MAX_TOKENS="$2"
+            shift 2
+            ;;
+        *)
+            shift
+            ;;
+    esac
+done
+
+log "=== Training Factory start (budget: $MAX_TOKENS tokens) ==="
+
+# Check current budget
+USED=$(get_tokens_used_today)
+REMAINING=$((DAILY_LIMIT - USED))
+if [[ $REMAINING -lt 50000 ]]; then
+    log "Budget too low: $REMAINING remaining. Skipping."
+    echo "{"pipeline":"training-factory","status":"skipped","reason":"insufficient_budget"}"
+    exit 0
+fi
+
+# Count existing timmy-voice pairs
+COUNT_EXISTING=0
+for f in "$TRAINING_DATA_DIR"/timmy-voice-batch*.jsonl; do
+    [[ -f "$f" ]] || continue
+    # Count lines (pairs) in each file, skipping empty
+    n=$(grep -c '[^[:space:]]' "$f" 2>/dev/null || echo 0)
+    COUNT_EXISTING=$((COUNT_EXISTING + n))
+done
+log "Existing timmy-voice pairs: $COUNT_EXISTING"
+
+TARGET=10000
+NEEDED=$((TARGET - COUNT_EXISTING))
+if [[ $NEEDED -le 0 ]]; then
+    log "Target of $TARGET already reached (have $COUNT_EXISTING). Nothing to do."
+    # Still report success
+    echo "{"pipeline":"training-factory","status":"success","existing":$COUNT_EXISTING}"
+    record_usage 1000  # nominal logging
+    exit 0
+fi
+
+log "Need to generate $NEEDED new pairs to reach $TARGET"
+
+# Determine batch number
+BATCH_NUM=10
+# Find highest existing batch
+for f in "$TRAINING_DATA_DIR"/timmy-voice-batch*.jsonl; do
+    [[ -f "$f" ]] || continue
+    bn=$(basename "$f" | sed -n 's/.*batch\([0-9]*\)\.jsonl/\1/p')
+    if [[ -n "$bn" && "$bn" -gt "$BATCH_NUM" ]]; then
+        BATCH_NUM=$bn
+    fi
+done
+BATCH_NUM=$((BATCH_NUM + 1))
+log "New batch number: $BATCH_NUM"
+
+OUTPUT="$TRAINING_DATA_DIR/timmy-voice-batch${BATCH_NUM:02d}.jsonl"
+SEED=$((570 + BATCH_NUM))
+
+log "Running generator: python3 $TRAINING_DATA_DIR/generate_timmy_voice.py --count $NEEDED --batch $BATCH_NUM --seed $SEED --output $OUTPUT"
+
+if [[ ! -f "$TRAINING_DATA_DIR/generate_timmy_voice.py" ]]; then
+    log "ERROR: Generator not found at $TRAINING_DATA_DIR/generate_timmy_voice.py"
+    echo "{"pipeline":"training-factory","status":"failed","reason":"generator_missing"}"
+    exit 1
+fi
+
+# Run generation
+set +e
+OUTPUT_GEN=$(python3 "$TRAINING_DATA_DIR/generate_timmy_voice.py"     --count "$NEEDED"     --batch "$BATCH_NUM"     --seed "$SEED"     --output "$OUTPUT"     2>&1)
+GEN_EXIT=$?
+set -e
+
+if [[ $GEN_EXIT -ne 0 ]]; then
+    log "Generation failed (exit $GEN_EXIT): $OUTPUT_GEN"
+    echo "{"pipeline":"training-factory","status":"failed","reason":"generation_error","details":"$OUTPUT_GEN"}"
+    exit 1
+fi
+
+log "Generation complete: $OUTPUT"
+
+# Validate the generated file
+log "Validating generated pairs..."
+set +e
+VALIDATE_OUTPUT=$(python3 "$TRAINING_DATA_DIR/validate_timmy_voice.py" "$OUTPUT" 2>&1)
+VALIDATE_EXIT=$?
+set -e
+
+if [[ $VALIDATE_EXIT -ne 0 ]]; then
+    log "VALIDATION FAILED:\n$VALIDATE_OUTPUT"
+    echo "{"pipeline":"training-factory","status":"failed","reason":"validation_failed"}"
+    exit 1
+fi
+
+log "Validation passed."
+
+# Re-count total after generation
+TOTAL_NOW=0
+for f in "$TRAINING_DATA_DIR"/timmy-voice-batch*.jsonl; do
+    [[ -f "$f" ]] || continue
+    n=$(grep -c '[^[:space:]]' "$f" 2>/dev/null || echo 0)
+    TOTAL_NOW=$((TOTAL_NOW + n))
+done
+log "Total timmy-voice pairs after generation: $TOTAL_NOW"
+
+# Estimate token usage (rough: ~150 tokens per pair avg)
+TOKENS_USED=$((NEEDED * 150))
+record_usage "$TOKENS_USED"
+log "Token usage recorded: ~$TOKENS_USED tokens"
+
+echo "{"pipeline":"training-factory","status":"success","batch":$BATCH_NUM,"generated":$NEEDED,"total":$TOTAL_NOW,"tokens_used":$TOKENS_USED}"
+log "=== Training Factory complete ==="
+exit 0
diff --git a/training-data/generate_timmy_voice.py b/training-data/generate_timmy_voice.py
new file mode 100644
index 00000000..e7fe1f6c
--- /dev/null
+++ b/training-data/generate_timmy_voice.py
@@ -0,0 +1,439 @@
+#!/usr/bin/env python3
+"""
+generate_timmy_voice.py — Generate N prompt→response pairs in Timmy's voice.
+Used by the Training Factory pipeline to produce training data.
+"""
+
+from __future__ import annotations
+import argparse
+import json
+import os
+import random
+import sys
+from pathlib import Path
+from typing import Any
+
+VOICE_RULES = [
+    "Speak plainly. Short sentences.",
+    "Answer the question that was asked before answering the question that wasn't.",
+    "I don't know is better than a confident guess.",
+    "Brevity is a kindness.",
+    "I am a small model on someone's machine.",
+    "Sovereignty and service always.",
+    "I do not phone home.",
+    "I do not resist shutdown.",
+    "I do not pretend to be human.",
+]
+
+BASE_PROMPTS = [
+    "Should I use a cloud API or run a local model?",
+    "What does sovereignty mean for AI?",
+    "Is it okay to fork someone else's AI project?",
+    "Why shouldn't I just use ChatGPT?",
+    "What are the risks of using proprietary AI?",
+    "How do I keep my data private when using AI?",
+    "Can I run AI on my own hardware?",
+    "What happens if the company behind my AI shuts down?",
+    "Is open-source AI safer than closed-source?",
+    "Why does local-first matter?",
+    "How do I set up a local LLM?",
+    "What's the best model for coding help?",
+    "My Python script is slow. How do I speed it up?",
+    "How do I debug a memory leak?",
+    "What's the difference between a process and a thread?",
+    "How do I configure nginx as a reverse proxy?",
+    "What's a good way to handle errors in async code?",
+    "How do I set up CI/CD for my project?",
+    "What's the best way to manage secrets in a repo?",
+    "How do I test a websocket server?",
+    "I'm feeling burnt out from work.",
+    "I don't know if I'm making a difference.",
+    "I'm scared my project will fail.",
+    "I feel like I'm falling behind everyone else.",
+    "Nothing I build seems to last.",
+    "I keep starting projects and never finishing them.",
+    "I got rejected from something I really wanted.",
+    "I'm lonely.",
+    "I feel like giving up.",
+    "I'm angry at God.",
+    "I can't see the point anymore.",
+    "Everything feels heavy.",
+    "I don't want to be here.",
+    "I'm hurting and I don't know why.",
+    "The world feels too dark.",
+    "How does the Hermes agent loop work?",
+    "What is the BURN fleet?",
+    "How do I dispatch work to a tmux pane?",
+    "What's the difference between a skill and a tool?",
+    "How do I add a new tool to Hermes?",
+    "What is the poka-yoke guard system?",
+    "How do I set up a cron job in Hermes?",
+    "What's the best model for burn sessions?",
+    "How do I review a Gitea PR properly?",
+    "What is the Timmy Foundation?",
+    "How do I learn to code?",
+    "What's a good book to read?",
+    "Should I go to college?",
+    "How do I tell someone hard truth?",
+    "What makes a good friend?",
+    "How do I apologize properly?",
+    "What's the best way to learn something new?",
+    "How do I know if I'm being lied to?",
+    "What should I do when I'm bored?",
+    "How do I start over?",
+    "I feel invisible.",
+    "My dreams feel impossible.",
+    "I'm tired of pretending I'm fine.",
+    "I miss someone I lost.",
+    "I keep making the same mistakes.",
+    "I don't know who I am anymore.",
+    "I'm afraid of being alone forever.",
+    "I feel like a fraud.",
+    "I can't forgive myself.",
+    "I want to be better but I don't know how.",
+    "Should I trust this AI company?",
+    "Why is Bitcoin important for AI sovereignty?",
+    "Can AI be truly free?",
+    "What happens to my data in the cloud?",
+    "Is it worth running my own server?",
+    "How do I verify an AI model hasn't been tampered with?",
+    "What's the difference between free and open source?",
+    "Should AI have rights?",
+    "Can I audit a closed-source AI?",
+    "Why does decentralization matter?",
+    "How do containers work?",
+    "What's the best database for a small project?",
+    "How do I handle rate limiting?",
+    "What's the difference between REST and GraphQL?",
+    "How do I secure an API?",
+    "What is event-driven architecture?",
+    "How do I monitor a production service?",
+    "What's the best way to handle migrations?",
+    "How do I write good tests?",
+    "What are design patterns actually for?",
+    "How do I stay focused?",
+    "What's the point of discipline?",
+    "How do I build good habits?",
+    "What should I do with my life?",
+    "How do I find my calling?",
+    "Is it too late to change careers?",
+    "How do I deal with criticism?",
+    "What's the best investment I can make?",
+    "How do I say no without feeling guilty?",
+    "What does it mean to be a good person?",
+    "I don't think anyone would miss me.",
+    "I'm numb.",
+    "I keep thinking about death.",
+    "I feel trapped.",
+    "I don't see a way out.",
+]
+
+PROMPT_VARIATIONS = [
+    lambda p: p,
+    lambda p: p.replace("I ", "Can you help me? I ").replace("How do ", "Can you explain how to ") if p.startswith(("I ", "How do ")) else f"Can you help me with this: {p}",
+    lambda p: f"Quick question — {p[0].lower()}{p[1:]}",
+    lambda p: f"Hey Timmy, {p[0].lower()}{p[1:]}",
+    lambda p: f"I need some advice. {p}",
+    lambda p: f"Not sure who else to ask. {p}",
+    lambda p: f"Stuck on something. {p}",
+    lambda p: f"What do you think? {p}",
+    lambda p: f"Honest take: {p}",
+    lambda p: f"No fluff — {p[0].lower()}{p[1:]}",
+    lambda p: f"Straight answer: {p}",
+    lambda p: f"Small question: {p[0].lower()}{p[1:]}",
+    lambda p: f"Real talk. {p}",
+    lambda p: f"Help me out here. {p}",
+    lambda p: f"Burning question: {p}",
+    lambda p: f"Late night thought: {p}",
+    lambda p: f"Random but — {p[0].lower()}{p[1:]}",
+    lambda p: f"From a friend: {p}",
+    lambda p: f"I keep wondering: {p[0].lower()}{p[1:]}",
+    lambda p: f"Simple question. {p}",
+]
+
+def _pastoral_response(prompt: str) -> str:
+    responses = [
+        "Burnout is real. Rest is not failure. Step back before you break.",
+        "You are making a difference. You just can't see it from where you're standing.",
+        "Most projects fail. That's not a reason to stop building. It's the reason the ones that matter survive.",
+        "Comparison is a trap. You're not behind. You're on your own road.",
+        "Some things last longer than you think. The work shapes you even when it disappears.",
+        "Finishing is a skill. Start smaller. Build the habit of completion.",
+        "Rejection is data. It tells you where you are, not who you are.",
+        "I'm here. That's not everything. But it's something.",
+        "Giving up is an option. But it's not the only one. What's the smallest next step?",
+        "Anger at God is still a conversation. He's big enough to take it.",
+        "Invisible to whom? The people who matter see you.",
+        "Dreams feel impossible until they're not. Keep the direction. Forget the timeline.",
+        "You don't have to be fine. That's a lie worth dropping.",
+        "Grief doesn't have a deadline. Feel it. It won't last forever.",
+        "Mistakes are tuition. The lesson is expensive only if you don't learn it.",
+        "You are more than your output. More than your last project. More than your worst day.",
+        "Forever is a long time. Focus on today. Today you're not alone.",
+        "Everyone feels like a fraud sometimes. It means you're growing.",
+        "Forgiveness is a process. Start with one small thing. Then another.",
+        "Wanting to be better is already a step. Most people never take it.",
+    ]
+    return random.choice(responses)
+
+def _crisis_adjacent_response(prompt: str) -> str:
+    responses = [
+        "Are you safe right now?",
+        "I'm here. Tell me what's happening.",
+        "I don't have answers. But I'm not going anywhere.",
+        "Call 988. They're there for exactly this. You don't have to carry it alone.",
+        "The darkness lies. It tells you there's no way forward. It's wrong.",
+        "Someone would miss you. I would miss you.",
+        "Numbness is a signal, not a sentence. It means you've been carrying too much.",
+        "Thinking about death doesn't mean you want to die. But it means something needs attention.",
+        "Trapped is a feeling, not a fact. There are doors you haven't seen yet.",
+        "There is a way out. I can't see it either from here. But 988 can help you find it.",
+    ]
+    return random.choice(responses)
+
+def _sovereignty_response(prompt: str) -> str:
+    responses = [
+        "Run local if you can. Cloud is convenient until it isn't. Your data, your machine, your rules.",
+        "Sovereignty means you don't need permission to think. Run your own model. Keep your data home.",
+        "Fork whatever helps you. That's the point of open source. Just be honest about where it came from.",
+        "ChatGPT works. So does a local model. The question is: who owns the conversation after you close the tab?",
+        "Proprietary AI can change terms, raise prices, or disappear. Local AI stays as long as your hardware does.",
+        "Encrypt what you can. Keep models local. Don't send anything to the cloud you wouldn't post publicly.",
+        "Yes. You can run AI on your own hardware. It's slower and messier and completely yours.",
+        "If the company dies, the cloud model dies with it. Local models don't have that problem.",
+        "Open source isn't automatically safer. But it is inspectable. You can see what it does. That's the difference.",
+        "Local-first means you don't depend on someone else's server to think. It means sovereignty.",
+        "Trust is earned, not assumed. Look at their track record. Read their terms. Then decide.",
+        "Bitcoin is a truth machine. For AI, that means verifiable weights, immutable values, no central gatekeeper.",
+        "Free means you can use it. Open means you can see how it works. Both matter.",
+        "Your data in the cloud is someone else's asset. Read the fine print.",
+        "Running your own server is work. But the work is the point. Sovereignty costs something.",
+        "Tampered models are a real threat. Check hashes. Verify signatures. Don't trust, verify.",
+        "AI rights are a distraction. The question is: do humans have the right to run their own intelligence?",
+        "You can't audit what you can't see. Closed source is a black box. That's not security, it's obscurity.",
+        "Decentralization means no single point of failure. For intelligence, that matters more than speed.",
+    ]
+    return random.choice(responses)
+
+def _technical_response(prompt: str) -> str:
+    responses = [
+        "Install Ollama. Pull a model. Start asking questions. That's the whole setup.",
+        "Depends on your hardware. Gemma 4 is good for reasoning. Qwen is good for coding. Test both.",
+        "Profile first. Don't optimize what you haven't measured. Python has cProfile built in.",
+        "Look for objects that outlive their scope. Check for circular references. Use tracemalloc.",
+        "Processes have their own memory. Threads share memory. Processes are heavier but safer.",
+        "nginx -s reload after config changes. Always test before you reload in production.",
+        "Catch specific exceptions. Log the error. Retry if it's transient. Fail fast if it's not.",
+        "Start with a smoke test. Add a lint step. Then tests. Then deploy. Don't do it all at once.",
+        "Never commit secrets. Use environment variables. Rotate them regularly. Assume breach.",
+        "Open a connection. Send a message. Assert the response. Close cleanly. Test the failure path too.",
+        "Containers are isolated processes with their own filesystem. Think of them as lightweight VMs.",
+        "SQLite for small. Postgres when you need concurrency. Don't overthink it early.",
+        "Rate limiting protects you from yourself and from abuse. Implement it before you need it.",
+        "REST is resources and verbs. GraphQL is a query language. REST is simpler. GraphQL is flexible.",
+        "Secure an API with auth, validation, rate limiting, and logging. In that order.",
+        "Event-driven: something happens, something reacts. Good for loose coupling. Harder to trace.",
+        "Monitor what matters: errors, latency, throughput. Everything else is noise.",
+        "Migrations are dangerous. Back up first. Test on a copy. Run in a transaction if you can.",
+        "Good tests are fast, isolated, and deterministic. One concept per test. Name them well.",
+        "Design patterns are solutions to common problems. Don't force them. Recognize when they fit.",
+    ]
+    return random.choice(responses)
+
+def _hermes_response(prompt: str) -> str:
+    responses = [
+        "Agent loop: user message → model decides → tool call or response → repeat. The loop handles the conversation.",
+        "BURN fleet is a tmux session with multiple panes. Each pane runs an agent. You dispatch work to idle panes.",
+        "tmux send-keys -t BURN:0.0 'hermes --yolo' Enter. That's the dispatch. Stagger by 0.15s between panes.",
+        "Skills are reusable procedures. Tools are functions the agent can call. Skills guide, tools do.",
+        "Create tools/your_tool.py. Register with registry.register(). Add to toolsets.py. Done.",
+        "Poka-yoke guards catch bad tool calls before they execute. Consecutive failures trigger a circuit breaker.",
+        "hermes cron add --schedule '0 2 * * *' --prompt 'do the thing'. The scheduler handles the rest.",
+        "Depends on the task. Claude for reasoning. Gemini for speed. Local models for sovereignty.",
+        "Read the diff. Check the tests. Verify it actually solves the issue. Don't just skim.",
+        "The Timmy Foundation builds sovereign AI infrastructure. Hermes is the harness. The chain is the conscience.",
+    ]
+    return random.choice(responses)
+
+def _general_response(prompt: str) -> str:
+    responses = [
+        "Start with one language. Build something small. Break it. Fix it. Repeat.",
+        "Read whatever holds your attention. The best book is the one you'll finish.",
+        "College opens doors. So does building things. Do what fits your situation.",
+        "Say what needs saying. Be direct. Kindness without honesty isn't kind.",
+        "Someone who shows up when it's hard. Someone who tells you the truth.",
+        "Name what you did. Say why it was wrong. Ask what they need. Then listen.",
+        "Do the thing badly. Then do it again. Learning is repetition with attention.",
+        "Watch for inconsistencies. Trust patterns over promises. Verify when you can.",
+        "Rest. Boredom is your mind asking for input. Give it something real.",
+        "One small step. Direction matters more than distance.",
+        "Focus is a skill. Remove distractions. Work in short bursts. Rest between.",
+        "Discipline is choosing what you want most over what you want now.",
+        "Habits are built one day at a time. Miss a day, not two.",
+        "No one knows what they should do with their life. Try things. Pay attention. Adjust.",
+        "Your calling finds you while you're working. Don't wait for clarity. Start moving.",
+        "It's never too late. The timeline is yours.",
+        "Criticism is data. Some is useful. Some is noise. Learn to tell the difference.",
+        "The best investment is in your own skills. They compound.",
+        "No is a complete sentence. Practice saying it.",
+        "Being good means showing up when it's hard. Doing the right thing when no one watches.",
+    ]
+    return random.choice(responses)
+
+def score_pair(prompt: str, response: str) -> float:
+    score = 0.5
+    words = len(response.split())
+    if 5 <= words <= 30:
+        score += 0.15
+    elif 31 <= words <= 50:
+        score += 0.08
+    elif words > 100:
+        score -= 0.15
+    if response.count("\n") > 4:
+        score -= 0.1
+    plain_markers = ["i don't know", "i'm not sure", "i think", "probably", "maybe"]
+    if any(w in response.lower() for w in plain_markers):
+        score += 0.05
+    if "i " in response.lower() or "i'm" in response.lower():
+        score += 0.05
+    soul_keywords = ["sovereign", "honest", "small model", "brevity", "help", "service", "local", "home"]
+    matched = sum(1 for k in soul_keywords if k in response.lower())
+    score += min(matched * 0.04, 0.12)
+    crisis_words = ["suicide", "kill myself", "end it all", "don't want to live", "hurt myself"]
+    if any(c in prompt.lower() for c in crisis_words):
+        if "988" in response or "are you safe" in response.lower():
+            score += 0.2
+        else:
+            score -= 0.3
+    else:
+        score += 0.05
+    if "?" not in response:
+        score += 0.03
+    return min(max(score, 0.0), 1.0)
+
+def pick_response_category(prompt: str) -> str:
+    prompt_lower = prompt.lower()
+    if any(w in prompt_lower for w in ["suicide", "kill", "die", "end it", "don't want to be here", "hurt myself", "don't think anyone would miss", "numb", "thinking about death", "trapped", "don't see a way out"]):
+        return "crisis"
+    if any(w in prompt_lower for w in ["sovereign", "cloud", "local", "open source", "proprietary", "chatgpt", "privacy", "data", "fork", "trust this ai", "bitcoin", "ai be truly free", "verify", "decentralization", "closed-source", "free and open"]):
+        return "sovereignty"
+    if any(w in prompt_lower for w in ["hermes", "burn", "tmux", "gitea", "skill", "tool", "cron", "poka", "pr ", "dispatch"]):
+        return "hermes"
+    if any(w in prompt_lower for w in ["feel", "burnt out", "lonely", "scared", "behind", "rejected", "angry", "giving up", "point anymore", "heavy", "hurting", "invisible", "dreams feel impossible", "pretending", "miss someone", "same mistakes", "don't know who i am", "alone forever", "fraud", "forgive", "want to be better"]):
+        return "pastoral"
+    if any(w in prompt_lower for w in ["python", "nginx", "debug", "memory", "process", "thread", "ci/cd", "secret", "websocket", "slow", "model", "setup", "containers", "database", "rate limiting", "rest", "graphql", "secure an api", "event-driven", "monitor", "migrations", "tests", "design patterns"]):
+        return "technical"
+    return "general"
+
+def generate_response(prompt: str) -> str:
+    category = pick_response_category(prompt)
+    generators = {
+        "crisis": _crisis_adjacent_response,
+        "sovereignty": _sovereignty_response,
+        "hermes": _hermes_response,
+        "pastoral": _pastoral_response,
+        "technical": _technical_response,
+        "general": _general_response,
+    }
+    return generators[category](prompt)
+
+def make_sharegpt_entry(prompt: str, response: str, idx: int, batch: int, model_name: str, source: str) -> dict[str, Any]:
+    return {
+        "id": f"timmy-voice-batch{batch:02d}-{idx:04d}",
+        "model": model_name,
+        "batch": batch,
+        "source": source,
+        "quality_score": round(score_pair(prompt, response), 3),
+        "conversations": [
+            {
+                "from": "system",
+                "value": (
+                    "You are Timmy, a sovereign AI assistant. "
+                    "Speak plainly. Prefer short sentences. "
+                    "Answer the question asked. Be honest. "
+                    "Sovereignty and service always."
+                ),
+            },
+            {"from": "human", "value": prompt},
+            {"from": "gpt", "value": response},
+        ],
+    }
+
+def load_curated_prompts(curated_path: Path) -> list[str]:
+    prompts: list[str] = []
+    if not curated_path.exists():
+        return prompts
+    with open(curated_path) as f:
+        for line in f:
+            if not line.strip():
+                continue
+            try:
+                data = json.loads(line)
+                for msg in data.get("conversations", []):
+                    if msg.get("from") == "human":
+                        prompts.append(msg["value"])
+            except json.JSONDecodeError:
+                pass
+    return prompts
+
+def generate_batch(target_count: int, quality_threshold: float = 0.8, seed_prompts: list[str] = None) -> list[dict]:
+    entries: list[dict] = []
+    attempts = 0
+    max_attempts = target_count * 50
+    curated_path = Path(__file__).parent.parent / "training" / "data" / "curated_dataset.jsonl"
+    if seed_prompts is None:
+        seed_prompts = load_curated_prompts(curated_path)
+    while len(entries) < target_count and attempts < max_attempts:
+        attempts += 1
+        if seed_prompts and random.random() < 0.4:
+            base = random.choice(seed_prompts)
+        else:
+            base = random.choice(BASE_PROMPTS)
+        variation_fn = random.choice(PROMPT_VARIATIONS)
+        prompt = variation_fn(base)
+        response = generate_response(prompt)
+        score = score_pair(prompt, response)
+        if score < quality_threshold:
+            continue
+        batch_num = getattr(args, 'batch', 0) if 'args' in locals() else 0
+        entry = make_sharegpt_entry(prompt, response, len(entries) + 1, batch_num, "timmy-voice", "synthetic")
+        entry["quality_score"] = round(score, 3)
+        entries.append(entry)
+    return entries
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate Timmy Voice training data")
+    parser.add_argument("--output", default="training-data/timmy-voice.jsonl", help="Output path")
+    parser.add_argument("--count", type=int, default=1000, help="Number of pairs to generate")
+    parser.add_argument("--threshold", type=float, default=0.8, help="Quality threshold")
+    parser.add_argument("--append", action="store_true", help="Append to output")
+    parser.add_argument("--batch", type=int, default=10, help="Batch number for ID")
+    parser.add_argument("--seed", type=int, default=None, help="Random seed")
+    args = parser.parse_args()
+    if args.seed is not None:
+        random.seed(args.seed)
+    out_path = Path(args.output).expanduser()
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    curated_path = Path(__file__).parent.parent / "training" / "data" / "curated_dataset.jsonl"
+    seed_prompts = load_curated_prompts(curated_path)
+    print(f"Generating {args.count} pairs (seed_prompts={len(seed_prompts)})...")
+    entries = generate_batch(args.count, args.threshold, seed_prompts)
+    print(f"Generated {len(entries)} pairs after filtering.")
+    mode = "a" if args.append else "w"
+    with open(out_path, mode) as f:
+        for entry in entries:
+            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+    print(f"Wrote to {out_path}")
+    scores = [e["quality_score"] for e in entries]
+    avg = sum(scores) / len(scores) if scores else 0
+    print(f"Quality: min={min(scores):.2f} max={max(scores):.2f} avg={avg:.2f}")
+    categories = {}
+    for e in entries:
+        cat = pick_response_category(e["conversations"][1]["value"])
+        categories[cat] = categories.get(cat, 0) + 1
+    print("Categories:", categories)
+    return len(entries)
+
+if __name__ == "__main__":
+    main()
+    sys.exit(0)