diff --git a/.env.example b/.env.example
index 98c5ea19..85ecf09d 100644
--- a/.env.example
+++ b/.env.example
@@ -165,3 +165,21 @@ IMAGE_TOOLS_DEBUG=false
 # CONTEXT_COMPRESSION_ENABLED=true        # Enable auto-compression (default: true)
 # CONTEXT_COMPRESSION_THRESHOLD=0.85      # Compress at 85% of context limit
 # CONTEXT_COMPRESSION_MODEL=google/gemini-2.0-flash-001  # Fast model for summaries
+
+# =============================================================================
+# RL TRAINING (Tinker + Atropos)
+# =============================================================================
+# Run reinforcement learning training on language models using the Tinker API.
+# Requires the rl-server to be running (from tinker-atropos package).
+
+# Tinker API Key - RL training service
+# Get at: https://tinker-console.thinkingmachines.ai/keys
+TINKER_API_KEY=
+
+# Weights & Biases API Key - Experiment tracking and metrics
+# Get at: https://wandb.ai/authorize
+WANDB_API_KEY=
+
+# RL API Server URL (default: http://localhost:8080)
+# Change if running the rl-server on a different host/port
+# RL_API_URL=http://localhost:8080
diff --git a/README.md b/README.md
index 8a999cb1..f49ae26a 100644
--- a/README.md
+++ b/README.md
@@ -74,6 +74,7 @@ You need at least one LLM provider:
 | Web scraping | [Firecrawl](https://firecrawl.dev/) | `FIRECRAWL_API_KEY` |
 | Browser automation | [Browserbase](https://browserbase.com/) | `BROWSERBASE_API_KEY`, `BROWSERBASE_PROJECT_ID` |
 | Image generation | [FAL](https://fal.ai/) | `FAL_KEY` |
+| RL Training | [Tinker](https://tinker-console.thinkingmachines.ai/) + [WandB](https://wandb.ai/) | `TINKER_API_KEY`, `WANDB_API_KEY` |
 | Messaging | Telegram, Discord | `TELEGRAM_BOT_TOKEN`, `DISCORD_BOT_TOKEN` |
 
 ---
@@ -270,6 +271,61 @@ When enabled, you'll see messages like:
 
 See [docs/messaging.md](docs/messaging.md) for WhatsApp and advanced setup.
 
+### 🤖 RL Training (Tinker + Atropos)
+
+Train language models with reinforcement learning using the Tinker API and Atropos framework.
+
+#### Requirements
+
+1. **API Keys:** Add to `~/.hermes/.env`:
+```bash
+TINKER_API_KEY=your-tinker-key      # Get from https://tinker-console.thinkingmachines.ai/keys
+WANDB_API_KEY=your-wandb-key        # Get from https://wandb.ai/authorize
+```
+
+2. **Install tinker-atropos:** (in a separate directory)
+```bash
+cd ~/tinker-atropos
+pip install -e .
+```
+
+3. **Start the RL API server:**
+```bash
+rl-server    # Runs on port 8080 by default
+```
+
+#### Using RL Tools
+
+The agent can now use RL training tools:
+
+```
+You: Start training on GSM8k with group_size=16
+
+Agent: I'll set up an RL training run on the GSM8k environment...
+[Uses rl_list_environments, rl_select_environment, rl_edit_config, rl_start_training]
+```
+
+#### Available RL Tools
+
+| Tool | Description |
+|------|-------------|
+| `rl_list_environments` | List available RL environments |
+| `rl_select_environment` | Select an environment for training |
+| `rl_get_current_config` | View all configurable options |
+| `rl_edit_config` | Change a configuration value |
+| `rl_start_training` | Start a training run |
+| `rl_check_status` | Check training progress |
+| `rl_stop_training` | Stop a running training |
+| `rl_get_results` | Fetch WandB metrics |
+
+#### Dedicated RL CLI
+
+For extended RL workflows with longer timeouts:
+
+```bash
+python rl_cli.py --model "anthropic/claude-sonnet-4-20250514"
+```
+
 ### ⏰ Scheduled Tasks (Cron)
 
 Schedule tasks to run automatically:
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index a0d98b6a..82ce6ae7 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -151,6 +151,20 @@ OPTIONAL_ENV_VARS = {
         "tools": ["image_generate"],
         "password": True,
     },
+    "TINKER_API_KEY": {
+        "description": "Tinker API key for RL training",
+        "prompt": "Tinker API key",
+        "url": "https://tinker-console.thinkingmachines.ai/keys",
+        "tools": ["rl_start_training", "rl_check_status", "rl_stop_training"],
+        "password": True,
+    },
+    "WANDB_API_KEY": {
+        "description": "Weights & Biases API key for experiment tracking",
+        "prompt": "WandB API key",
+        "url": "https://wandb.ai/authorize",
+        "tools": ["rl_get_results", "rl_check_status"],
+        "password": True,
+    },
     "OPENAI_BASE_URL": {
         "description": "Custom OpenAI-compatible API endpoint URL",
         "prompt": "API base URL (e.g., https://api.example.com/v1)",
diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py
index 06668d4e..83f42730 100644
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -186,6 +186,14 @@ def _print_setup_summary(config: dict, hermes_home):
     else:
         tool_status.append(("Image Generation", False, "FAL_KEY"))
     
+    # Tinker + WandB (RL training)
+    if get_env_value('TINKER_API_KEY') and get_env_value('WANDB_API_KEY'):
+        tool_status.append(("RL Training (Tinker)", True, None))
+    elif get_env_value('TINKER_API_KEY'):
+        tool_status.append(("RL Training (Tinker)", False, "WANDB_API_KEY"))
+    else:
+        tool_status.append(("RL Training (Tinker)", False, "TINKER_API_KEY"))
+    
     # Terminal (always available if system deps met)
     tool_status.append(("Terminal/Commands", True, None))
     
@@ -932,6 +940,47 @@ def run_setup_wizard(args):
             if api_key:
                 save_env_value("FAL_KEY", api_key)
                 print_success("    Configured ✓")
+    print()
+    
+    # Tinker + WandB - RL Training
+    print_info("─" * 50)
+    print(color("  RL Training (Tinker + WandB)", Colors.CYAN))
+    print_info("  Enables: rl_start_training, rl_check_status, rl_get_results tools")
+    print_info("  Use case: Run reinforcement learning training via Tinker API")
+    tinker_configured = get_env_value('TINKER_API_KEY')
+    wandb_configured = get_env_value('WANDB_API_KEY')
+    
+    if tinker_configured and wandb_configured:
+        print_success("  Status: Configured ✓")
+        if prompt_yes_no("  Update RL training credentials?", False):
+            api_key = prompt("    Tinker API key", password=True)
+            if api_key:
+                save_env_value("TINKER_API_KEY", api_key)
+            wandb_key = prompt("    WandB API key", password=True)
+            if wandb_key:
+                save_env_value("WANDB_API_KEY", wandb_key)
+            print_success("    Updated")
+    else:
+        if tinker_configured:
+            print_warning("  Status: Tinker configured, WandB missing")
+        elif wandb_configured:
+            print_warning("  Status: WandB configured, Tinker missing")
+        else:
+            print_warning("  Status: Not configured (tools will be disabled)")
+        
+        if prompt_yes_no("  Set up RL Training?", False):
+            print_info("    Get Tinker key at: https://tinker-console.thinkingmachines.ai/keys")
+            print_info("    Get WandB key at: https://wandb.ai/authorize")
+            api_key = prompt("    Tinker API key", password=True)
+            if api_key:
+                save_env_value("TINKER_API_KEY", api_key)
+            wandb_key = prompt("    WandB API key", password=True)
+            if wandb_key:
+                save_env_value("WANDB_API_KEY", wandb_key)
+            if api_key and wandb_key:
+                print_success("    Configured ✓")
+            else:
+                print_warning("    Partially configured (both keys required)")
     
     # =========================================================================
     # Save config and show summary
diff --git a/hermes_cli/status.py b/hermes_cli/status.py
index 2d24bb50..bbbdc2af 100644
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@@ -74,6 +74,8 @@ def show_status(args):
         "Firecrawl": "FIRECRAWL_API_KEY",
         "Browserbase": "BROWSERBASE_API_KEY",
         "FAL": "FAL_KEY",
+        "Tinker": "TINKER_API_KEY",
+        "WandB": "WANDB_API_KEY",
     }
     
     for name, env_var in keys.items():
diff --git a/model_tools.py b/model_tools.py
index ebabaf56..d84c3296 100644
--- a/model_tools.py
+++ b/model_tools.py
@@ -554,13 +554,13 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "rl_edit_config",
-                "description": "Update a configuration field. Valid fields: group_size (int), max_token_length (int), total_steps (int), steps_per_eval (int), use_wandb (bool), wandb_name (str), max_num_workers (int).",
+                "description": "Update a configuration field. Use rl_get_current_config() first to see all available fields for the selected environment. Each environment has different configurable options. Infrastructure settings (tokenizer, URLs, lora_rank, learning_rate) are locked.",
                 "parameters": {
                     "type": "object",
                     "properties": {
                         "field": {
                             "type": "string",
-                            "description": "Name of the field to update"
+                            "description": "Name of the field to update (get available fields from rl_get_current_config)"
                         },
                         "value": {
                             "description": "New value for the field"
@@ -574,26 +574,10 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
             "type": "function",
             "function": {
                 "name": "rl_start_training",
-                "description": "Start a new RL training run. WARNING: Training can take hours. Use rl_check_status() to monitor (30-minute intervals recommended). Test with rl_test_inference() first!",
+                "description": "Start a new RL training run with the current environment and config. Most training parameters (lora_rank, learning_rate, etc.) are fixed. Use rl_edit_config() to set group_size, batch_size, wandb_project before starting. WARNING: Training takes hours. Test with rl_test_inference() first!",
                 "parameters": {
                     "type": "object",
-                    "properties": {
-                        "wandb_project": {
-                            "type": "string",
-                            "description": "WandB project name for logging",
-                            "default": "rl-training"
-                        },
-                        "lora_rank": {
-                            "type": "integer",
-                            "description": "LoRA rank for training",
-                            "default": 32
-                        },
-                        "learning_rate": {
-                            "type": "number",
-                            "description": "Learning rate",
-                            "default": 4e-5
-                        }
-                    },
+                    "properties": {},
                     "required": []
                 }
             }
@@ -1324,13 +1308,7 @@ def handle_rl_function_call(
         )
     
     elif function_name == "rl_start_training":
-        return loop.run_until_complete(
-            rl_start_training(
-                wandb_project=function_args.get("wandb_project", "rl-training"),
-                lora_rank=function_args.get("lora_rank", 32),
-                learning_rate=function_args.get("learning_rate", 4e-5)
-            )
-        )
+        return loop.run_until_complete(rl_start_training())
     
     elif function_name == "rl_check_status":
         return loop.run_until_complete(
diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py
index 1b7401c1..7c40bc72 100644
--- a/tools/rl_training_tool.py
+++ b/tools/rl_training_tool.py
@@ -168,20 +168,22 @@ async def rl_get_current_config() -> str:
     """
     Get the current environment configuration.
     
-    Returns only the fields that are safe to modify. Other fields
-    (tokenizer_name, rollout_server_url, etc.) are fixed by the system.
+    Returns all configurable fields for the selected environment.
+    Each environment may have different configuration options.
     
-    Available fields:
-    - group_size: Rollouts per prompt (4-16 typical)
-    - max_token_length: Max generation tokens (2048-16384)
-    - total_steps: Training steps (50-2000)
-    - steps_per_eval: Steps between evaluations
-    - use_wandb: Enable WandB logging
+    Fields are divided into:
+    - configurable_fields: Can be changed with rl_edit_config()
+    - locked_fields: Infrastructure settings that cannot be changed
+    
+    Common configurable fields include:
+    - group_size: Rollouts per prompt
+    - batch_size: Training batch size
     - wandb_name: WandB run name prefix
-    - max_num_workers: Concurrent workers (-1 = auto)
+    - system_prompt: Model instructions
+    - And any environment-specific options
     
     Returns:
-        JSON string with current config fields and their values
+        JSON string with configurable and locked fields
     """
     result = await _make_request("GET", "/config")
     return json.dumps(result, indent=2)
@@ -191,21 +193,15 @@ async def rl_edit_config(field: str, value: Any) -> str:
     """
     Update a configuration field.
     
-    Only exposed fields can be modified. Validates field name and type.
+    Use rl_get_current_config() first to see available fields for the
+    selected environment. Each environment has different options.
+    
+    Locked fields (infrastructure settings) cannot be changed.
     
     Args:
-        field: Name of the field to update (e.g., "group_size", "total_steps")
+        field: Name of the field to update (from rl_get_current_config)
         value: New value for the field
     
-    Valid fields:
-    - group_size (int): Rollouts per prompt
-    - max_token_length (int): Max generation tokens
-    - total_steps (int): Training steps
-    - steps_per_eval (int): Eval frequency
-    - use_wandb (bool): Enable logging
-    - wandb_name (str): Run name prefix
-    - max_num_workers (int): Workers count
-    
     Returns:
         JSON string with updated config or error message
     """
@@ -217,37 +213,28 @@ async def rl_edit_config(field: str, value: Any) -> str:
 # Training Management Tools
 # ============================================================================
 
-async def rl_start_training(
-    wandb_project: str = "rl-training",
-    lora_rank: int = 32,
-    learning_rate: float = 4e-5,
-) -> str:
+async def rl_start_training() -> str:
     """
     Start a new RL training run with the current environment and config.
     
     Requires an environment to be selected first using rl_select_environment().
+    Use rl_edit_config() to set group_size, batch_size, wandb_project before starting.
     
-    WARNING: Training runs can take hours to days. Use rl_check_status() to
-    monitor progress (recommended: check every 30 minutes at most).
+    Most training parameters are fixed (lora_rank=32, learning_rate=4e-5, etc.)
+    and cannot be changed.
     
-    Args:
-        wandb_project: WandB project name for logging
-        lora_rank: LoRA rank for training (default: 32)
-        learning_rate: Learning rate (default: 4e-5)
+    WARNING: Training runs take hours. Use rl_check_status() to monitor
+    progress (recommended: check every 30 minutes at most).
     
     Returns:
         JSON string with run_id and initial status
     
     TIP: Before starting training:
     1. Test with rl_test_inference() to verify the environment works
-    2. Start with fewer total_steps to validate the setup
+    2. Configure group_size and batch_size appropriately
     3. Monitor WandB metrics for reward/mean and percent_correct
     """
-    result = await _make_request("POST", "/runs", {
-        "wandb_project": wandb_project,
-        "lora_rank": lora_rank,
-        "learning_rate": learning_rate,
-    })
+    result = await _make_request("POST", "/runs", {})
     return json.dumps(result, indent=2)