Update RL tools and enhance configuration management
- Modified `model_tools.py` to update default model IDs and add new RL function `rl_test_inference`. - Enhanced `README.md` with installation instructions for submodules and updated API key usage. - Improved `rl_cli.py` to load configuration from `~/.hermes/config.yaml` and set terminal working directory for RL tools. - Updated `run_agent.py` to handle empty string arguments as empty objects for better JSON validation. - Refined installation scripts to ensure submodules are cloned and installed correctly, enhancing setup experience.
This commit is contained in:
23
README.md
23
README.md
@@ -15,7 +15,7 @@ irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/ins
|
||||
```
|
||||
|
||||
The installer will:
|
||||
- Clone to `~/.hermes-agent`
|
||||
- Clone to `~/.hermes-agent` (with submodules: mini-swe-agent, tinker-atropos)
|
||||
- Create a virtual environment
|
||||
- Install all dependencies
|
||||
- Run the interactive setup wizard
|
||||
@@ -281,18 +281,10 @@ Train language models with reinforcement learning using the Tinker API and Atrop
|
||||
```bash
|
||||
TINKER_API_KEY=your-tinker-key # Get from https://tinker-console.thinkingmachines.ai/keys
|
||||
WANDB_API_KEY=your-wandb-key # Get from https://wandb.ai/authorize
|
||||
OPENROUTER_API_KEY=your-key # Optional: for rl_test_inference
|
||||
```
|
||||
|
||||
2. **Install tinker-atropos:** (in a separate directory)
|
||||
```bash
|
||||
cd ~/tinker-atropos
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
3. **Start the RL API server:**
|
||||
```bash
|
||||
rl-server # Runs on port 8080 by default
|
||||
```
|
||||
2. **That's it!** tinker-atropos is included as a submodule - no separate installation needed.
|
||||
|
||||
#### Using RL Tools
|
||||
|
||||
@@ -313,10 +305,12 @@ Agent: I'll set up an RL training run on the GSM8k environment...
|
||||
| `rl_select_environment` | Select an environment for training |
|
||||
| `rl_get_current_config` | View all configurable options |
|
||||
| `rl_edit_config` | Change a configuration value |
|
||||
| `rl_test_inference` | Test environment with OpenRouter (pre-training validation) |
|
||||
| `rl_start_training` | Start a training run |
|
||||
| `rl_check_status` | Check training progress |
|
||||
| `rl_stop_training` | Stop a running training |
|
||||
| `rl_get_results` | Fetch WandB metrics |
|
||||
| `rl_list_runs` | List active training runs |
|
||||
|
||||
#### Dedicated RL CLI
|
||||
|
||||
@@ -434,7 +428,7 @@ skills/
|
||||
If you prefer not to use the installer:
|
||||
|
||||
```bash
|
||||
# Clone the repository
|
||||
# Clone the repository (with submodules)
|
||||
git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
|
||||
cd hermes-agent
|
||||
|
||||
@@ -445,6 +439,11 @@ cd hermes-agent
|
||||
python3 -m venv venv
|
||||
source venv/bin/activate
|
||||
pip install -e ".[all]"
|
||||
|
||||
# Install submodules (required for terminal and RL tools)
|
||||
pip install -e "./mini-swe-agent" # Terminal tool backend
|
||||
pip install -e "./tinker-atropos" # RL training backend
|
||||
|
||||
hermes setup
|
||||
```
|
||||
|
||||
|
||||
@@ -665,7 +665,7 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
|
||||
"models": {
|
||||
"type": "array",
|
||||
"items": {"type": "string"},
|
||||
"description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, zhipu-ai/glm-4-flash, minimax/minimax-m1"
|
||||
"description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.1"
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
@@ -730,7 +730,7 @@ def get_all_tool_names() -> List[str]:
|
||||
"rl_get_current_config", "rl_edit_config",
|
||||
"rl_start_training", "rl_check_status",
|
||||
"rl_stop_training", "rl_get_results",
|
||||
"rl_list_runs"
|
||||
"rl_list_runs", "rl_test_inference"
|
||||
])
|
||||
|
||||
return tool_names
|
||||
@@ -898,7 +898,7 @@ def get_tool_definitions(
|
||||
"rl_get_current_config", "rl_edit_config",
|
||||
"rl_start_training", "rl_check_status",
|
||||
"rl_stop_training", "rl_get_results",
|
||||
"rl_list_runs"
|
||||
"rl_list_runs", "rl_test_inference"
|
||||
]
|
||||
}
|
||||
legacy_tools = legacy_map.get(toolset_name, [])
|
||||
@@ -950,7 +950,7 @@ def get_tool_definitions(
|
||||
"rl_get_current_config", "rl_edit_config",
|
||||
"rl_start_training", "rl_check_status",
|
||||
"rl_stop_training", "rl_get_results",
|
||||
"rl_list_runs"
|
||||
"rl_list_runs", "rl_test_inference"
|
||||
]
|
||||
}
|
||||
legacy_tools = legacy_map.get(toolset_name, [])
|
||||
@@ -1407,7 +1407,7 @@ def handle_function_call(
|
||||
"rl_get_current_config", "rl_edit_config",
|
||||
"rl_start_training", "rl_check_status",
|
||||
"rl_stop_training", "rl_get_results",
|
||||
"rl_list_runs"
|
||||
"rl_list_runs", "rl_test_inference"
|
||||
]:
|
||||
return handle_rl_function_call(function_name, function_args)
|
||||
|
||||
|
||||
91
rl_cli.py
91
rl_cli.py
@@ -25,14 +25,34 @@ import sys
|
||||
from pathlib import Path
|
||||
|
||||
import fire
|
||||
import yaml
|
||||
|
||||
# Load environment variables from .env file
|
||||
from dotenv import load_dotenv
|
||||
|
||||
env_path = Path(__file__).parent / '.env'
|
||||
if env_path.exists():
|
||||
load_dotenv(dotenv_path=env_path)
|
||||
print(f"✅ Loaded environment variables from {env_path}")
|
||||
# Load from ~/.hermes/.env first, then local .env
|
||||
hermes_env_path = Path.home() / '.hermes' / '.env'
|
||||
local_env_path = Path(__file__).parent / '.env'
|
||||
|
||||
if hermes_env_path.exists():
|
||||
load_dotenv(dotenv_path=hermes_env_path)
|
||||
print(f"✅ Loaded environment variables from {hermes_env_path}")
|
||||
elif local_env_path.exists():
|
||||
load_dotenv(dotenv_path=local_env_path)
|
||||
print(f"✅ Loaded environment variables from {local_env_path}")
|
||||
|
||||
# Set terminal working directory to tinker-atropos submodule
|
||||
# This ensures terminal commands run in the right context for RL work
|
||||
tinker_atropos_dir = Path(__file__).parent / 'tinker-atropos'
|
||||
if tinker_atropos_dir.exists():
|
||||
os.environ['TERMINAL_CWD'] = str(tinker_atropos_dir)
|
||||
os.environ['HERMES_QUIET'] = '1' # Disable temp subdirectory creation
|
||||
print(f"📂 Terminal working directory: {tinker_atropos_dir}")
|
||||
else:
|
||||
# Fall back to hermes-agent directory if submodule not found
|
||||
os.environ['TERMINAL_CWD'] = str(Path(__file__).parent)
|
||||
os.environ['HERMES_QUIET'] = '1'
|
||||
print(f"⚠️ tinker-atropos submodule not found, using: {Path(__file__).parent}")
|
||||
|
||||
# Import agent and tools
|
||||
from run_agent import AIAgent
|
||||
@@ -40,6 +60,50 @@ from model_tools import get_tool_definitions, check_toolset_requirements
|
||||
from tools.rl_training_tool import check_rl_api_keys, get_missing_keys
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Config Loading
|
||||
# ============================================================================
|
||||
|
||||
DEFAULT_MODEL = "anthropic/claude-opus-4.5"
|
||||
DEFAULT_BASE_URL = "https://openrouter.ai/api/v1"
|
||||
|
||||
|
||||
def load_hermes_config() -> dict:
|
||||
"""
|
||||
Load configuration from ~/.hermes/config.yaml.
|
||||
|
||||
Returns:
|
||||
dict: Configuration with model, base_url, etc.
|
||||
"""
|
||||
config_path = Path.home() / '.hermes' / 'config.yaml'
|
||||
|
||||
config = {
|
||||
"model": DEFAULT_MODEL,
|
||||
"base_url": DEFAULT_BASE_URL,
|
||||
}
|
||||
|
||||
if config_path.exists():
|
||||
try:
|
||||
with open(config_path, "r") as f:
|
||||
file_config = yaml.safe_load(f) or {}
|
||||
|
||||
# Get model from config
|
||||
if "model" in file_config:
|
||||
if isinstance(file_config["model"], str):
|
||||
config["model"] = file_config["model"]
|
||||
elif isinstance(file_config["model"], dict):
|
||||
config["model"] = file_config["model"].get("default", DEFAULT_MODEL)
|
||||
|
||||
# Get base_url if specified
|
||||
if "base_url" in file_config:
|
||||
config["base_url"] = file_config["base_url"]
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ Warning: Failed to load config.yaml: {e}")
|
||||
|
||||
return config
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# RL-Specific Configuration
|
||||
# ============================================================================
|
||||
@@ -108,7 +172,7 @@ When asked to train a model, follow this workflow:
|
||||
"""
|
||||
|
||||
# Toolsets to enable for RL workflows
|
||||
RL_TOOLSETS = ["base", "terminal", "web", "rl"]
|
||||
RL_TOOLSETS = ["terminal", "web", "rl"]
|
||||
|
||||
|
||||
# ============================================================================
|
||||
@@ -172,9 +236,9 @@ def list_environments_sync():
|
||||
|
||||
def main(
|
||||
task: str = None,
|
||||
model: str = "anthropic/claude-sonnet-4-20250514",
|
||||
model: str = None,
|
||||
api_key: str = None,
|
||||
base_url: str = "https://openrouter.ai/api/v1",
|
||||
base_url: str = None,
|
||||
max_iterations: int = RL_MAX_ITERATIONS,
|
||||
interactive: bool = False,
|
||||
list_environments: bool = False,
|
||||
@@ -187,9 +251,9 @@ def main(
|
||||
|
||||
Args:
|
||||
task: The training task/goal (e.g., "Train a model on GSM8k for math")
|
||||
model: Model to use for the agent (default: claude-sonnet-4)
|
||||
model: Model to use for the agent (reads from ~/.hermes/config.yaml if not provided)
|
||||
api_key: OpenRouter API key (uses OPENROUTER_API_KEY env var if not provided)
|
||||
base_url: API base URL (default: OpenRouter)
|
||||
base_url: API base URL (reads from config or defaults to OpenRouter)
|
||||
max_iterations: Maximum agent iterations (default: 200 for long workflows)
|
||||
interactive: Run in interactive mode (multiple conversations)
|
||||
list_environments: Just list available RL environments and exit
|
||||
@@ -210,6 +274,15 @@ def main(
|
||||
# Check server status
|
||||
python rl_cli.py --check-server
|
||||
"""
|
||||
# Load config from ~/.hermes/config.yaml
|
||||
config = load_hermes_config()
|
||||
|
||||
# Use config values if not explicitly provided
|
||||
if model is None:
|
||||
model = config["model"]
|
||||
if base_url is None:
|
||||
base_url = config["base_url"]
|
||||
|
||||
print("🎯 RL Training Agent")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
@@ -1764,10 +1764,16 @@ class AIAgent:
|
||||
self._invalid_tool_retries = 0
|
||||
|
||||
# Validate tool call arguments are valid JSON
|
||||
# Handle empty strings as empty objects (common model quirk)
|
||||
invalid_json_args = []
|
||||
for tc in assistant_message.tool_calls:
|
||||
args = tc.function.arguments
|
||||
# Treat empty/whitespace strings as empty object
|
||||
if not args or not args.strip():
|
||||
tc.function.arguments = "{}"
|
||||
continue
|
||||
try:
|
||||
json.loads(tc.function.arguments)
|
||||
json.loads(args)
|
||||
except json.JSONDecodeError as e:
|
||||
invalid_json_args.append((tc.function.name, str(e)))
|
||||
|
||||
|
||||
@@ -150,14 +150,15 @@ function Install-Repository {
|
||||
}
|
||||
} else {
|
||||
# Try SSH first (for private repo access), fall back to HTTPS
|
||||
# Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos
|
||||
Write-Info "Trying SSH clone..."
|
||||
$sshResult = git clone --branch $Branch $RepoUrlSsh $InstallDir 2>&1
|
||||
$sshResult = git clone --branch $Branch --recurse-submodules $RepoUrlSsh $InstallDir 2>&1
|
||||
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Success "Cloned via SSH"
|
||||
} else {
|
||||
Write-Info "SSH failed, trying HTTPS..."
|
||||
$httpsResult = git clone --branch $Branch $RepoUrlHttps $InstallDir 2>&1
|
||||
$httpsResult = git clone --branch $Branch --recurse-submodules $RepoUrlHttps $InstallDir 2>&1
|
||||
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Success "Cloned via HTTPS"
|
||||
@@ -171,6 +172,13 @@ function Install-Repository {
|
||||
}
|
||||
}
|
||||
|
||||
# Ensure submodules are initialized and updated (for existing installs or if --recurse failed)
|
||||
Write-Info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
|
||||
Push-Location $InstallDir
|
||||
git submodule update --init --recursive
|
||||
Pop-Location
|
||||
Write-Success "Submodules ready"
|
||||
|
||||
Write-Success "Repository ready"
|
||||
}
|
||||
|
||||
@@ -208,15 +216,43 @@ function Install-Dependencies {
|
||||
& .\venv\Scripts\Activate.ps1
|
||||
}
|
||||
|
||||
# Install main package
|
||||
try {
|
||||
pip install -e ".[all]" 2>&1 | Out-Null
|
||||
} catch {
|
||||
pip install -e "." | Out-Null
|
||||
}
|
||||
|
||||
Write-Success "Main package installed"
|
||||
|
||||
# Install submodules
|
||||
Write-Info "Installing mini-swe-agent (terminal tool backend)..."
|
||||
if (Test-Path "mini-swe-agent\pyproject.toml") {
|
||||
try {
|
||||
pip install -e ".\mini-swe-agent" 2>&1 | Out-Null
|
||||
Write-Success "mini-swe-agent installed"
|
||||
} catch {
|
||||
Write-Warning "mini-swe-agent install failed (terminal tools may not work)"
|
||||
}
|
||||
} else {
|
||||
Write-Warning "mini-swe-agent not found (run: git submodule update --init)"
|
||||
}
|
||||
|
||||
Write-Info "Installing tinker-atropos (RL training backend)..."
|
||||
if (Test-Path "tinker-atropos\pyproject.toml") {
|
||||
try {
|
||||
pip install -e ".\tinker-atropos" 2>&1 | Out-Null
|
||||
Write-Success "tinker-atropos installed"
|
||||
} catch {
|
||||
Write-Warning "tinker-atropos install failed (RL tools may not work)"
|
||||
}
|
||||
} else {
|
||||
Write-Warning "tinker-atropos not found (run: git submodule update --init)"
|
||||
}
|
||||
|
||||
Pop-Location
|
||||
|
||||
Write-Success "Dependencies installed"
|
||||
Write-Success "All dependencies installed"
|
||||
}
|
||||
|
||||
function Set-PathVariable {
|
||||
|
||||
@@ -292,12 +292,13 @@ clone_repo() {
|
||||
fi
|
||||
else
|
||||
# Try SSH first (for private repo access), fall back to HTTPS
|
||||
# Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos
|
||||
log_info "Trying SSH clone..."
|
||||
if git clone --branch "$BRANCH" "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then
|
||||
if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then
|
||||
log_success "Cloned via SSH"
|
||||
else
|
||||
log_info "SSH failed, trying HTTPS..."
|
||||
if git clone --branch "$BRANCH" "$REPO_URL_HTTPS" "$INSTALL_DIR"; then
|
||||
if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_HTTPS" "$INSTALL_DIR"; then
|
||||
log_success "Cloned via HTTPS"
|
||||
else
|
||||
log_error "Failed to clone repository"
|
||||
@@ -310,6 +311,12 @@ clone_repo() {
|
||||
fi
|
||||
|
||||
cd "$INSTALL_DIR"
|
||||
|
||||
# Ensure submodules are initialized and updated (for existing installs or if --recurse failed)
|
||||
log_info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
|
||||
git submodule update --init --recursive
|
||||
log_success "Submodules ready"
|
||||
|
||||
log_success "Repository ready"
|
||||
}
|
||||
|
||||
@@ -343,10 +350,29 @@ install_deps() {
|
||||
source venv/bin/activate
|
||||
fi
|
||||
|
||||
# Install the package in editable mode with all extras
|
||||
# Install the main package in editable mode with all extras
|
||||
pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
|
||||
|
||||
log_success "Dependencies installed"
|
||||
log_success "Main package installed"
|
||||
|
||||
# Install submodules
|
||||
log_info "Installing mini-swe-agent (terminal tool backend)..."
|
||||
if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
|
||||
pip install -e "./mini-swe-agent" > /dev/null 2>&1 || log_warn "mini-swe-agent install failed (terminal tools may not work)"
|
||||
log_success "mini-swe-agent installed"
|
||||
else
|
||||
log_warn "mini-swe-agent not found (run: git submodule update --init)"
|
||||
fi
|
||||
|
||||
log_info "Installing tinker-atropos (RL training backend)..."
|
||||
if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
|
||||
pip install -e "./tinker-atropos" > /dev/null 2>&1 || log_warn "tinker-atropos install failed (RL tools may not work)"
|
||||
log_success "tinker-atropos installed"
|
||||
else
|
||||
log_warn "tinker-atropos not found (run: git submodule update --init)"
|
||||
fi
|
||||
|
||||
log_success "All dependencies installed"
|
||||
}
|
||||
|
||||
setup_path() {
|
||||
|
||||
@@ -37,6 +37,7 @@ import subprocess
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
import yaml
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
@@ -84,6 +85,7 @@ LOCKED_FIELDS = {
|
||||
"weight": 1.0,
|
||||
"num_requests_for_eval": 256,
|
||||
"timeout": 3600,
|
||||
"server_type": "sglang", # Tinker uses sglang for actual training
|
||||
}
|
||||
],
|
||||
"tinker": {
|
||||
@@ -211,6 +213,9 @@ def _scan_environments() -> List[EnvironmentInfo]:
|
||||
def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
Dynamically import an environment and extract its config fields.
|
||||
|
||||
Uses config_init() to get the actual config class, with fallback to
|
||||
directly importing BaseEnvConfig if config_init fails.
|
||||
"""
|
||||
try:
|
||||
# Load the environment module
|
||||
@@ -230,15 +235,38 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
|
||||
if not env_class:
|
||||
return {}
|
||||
|
||||
# Call config_init to get the actual config
|
||||
# Try calling config_init to get the actual config class
|
||||
config_class = None
|
||||
try:
|
||||
env_config, server_configs = env_class.config_init()
|
||||
config_class = type(env_config)
|
||||
except Exception as config_error:
|
||||
# Fallback: try to import BaseEnvConfig directly from atroposlib
|
||||
print(f"Note: config_init failed ({config_error}), using BaseEnvConfig defaults")
|
||||
try:
|
||||
from atroposlib.envs.base import BaseEnvConfig
|
||||
config_class = BaseEnvConfig
|
||||
except ImportError:
|
||||
return {}
|
||||
|
||||
if not config_class:
|
||||
return {}
|
||||
|
||||
# Helper to make values JSON-serializable (handle enums, etc.)
|
||||
def make_serializable(val):
|
||||
if val is None:
|
||||
return None
|
||||
if hasattr(val, 'value'): # Enum
|
||||
return val.value
|
||||
if hasattr(val, 'name') and hasattr(val, '__class__') and 'Enum' in str(type(val)):
|
||||
return val.name
|
||||
return val
|
||||
|
||||
# Extract fields from the Pydantic model
|
||||
fields = {}
|
||||
for field_name, field_info in config_class.model_fields.items():
|
||||
field_type = field_info.annotation
|
||||
default = field_info.default
|
||||
default = make_serializable(field_info.default)
|
||||
description = field_info.description or ""
|
||||
|
||||
is_locked = field_name in LOCKED_FIELD_NAMES
|
||||
@@ -248,12 +276,15 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
|
||||
if hasattr(field_type, "__origin__"):
|
||||
type_name = str(field_type)
|
||||
|
||||
locked_value = LOCKED_FIELDS.get("env", {}).get(field_name, default)
|
||||
current_value = make_serializable(locked_value) if is_locked else default
|
||||
|
||||
fields[field_name] = {
|
||||
"type": type_name,
|
||||
"default": default if default is not None else None,
|
||||
"default": default,
|
||||
"description": description,
|
||||
"locked": is_locked,
|
||||
"current_value": LOCKED_FIELDS.get("env", {}).get(field_name, default) if is_locked else default,
|
||||
"current_value": current_value,
|
||||
}
|
||||
|
||||
return fields
|
||||
@@ -315,7 +346,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
|
||||
|
||||
trainer_log_file = open(trainer_log, "w")
|
||||
run_state.trainer_process = subprocess.Popen(
|
||||
["python", "launch_training.py", "--config", str(config_path)],
|
||||
[sys.executable, "launch_training.py", "--config", str(config_path)],
|
||||
stdout=trainer_log_file,
|
||||
stderr=subprocess.STDOUT,
|
||||
cwd=str(TINKER_ATROPOS_ROOT),
|
||||
@@ -355,7 +386,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
|
||||
|
||||
env_log_file = open(env_log, "w")
|
||||
run_state.env_process = subprocess.Popen(
|
||||
["python", str(env_info.file_path), "serve", "--config", str(config_path)],
|
||||
[sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)],
|
||||
stdout=env_log_file,
|
||||
stderr=subprocess.STDOUT,
|
||||
cwd=str(TINKER_ATROPOS_ROOT),
|
||||
@@ -543,17 +574,14 @@ async def rl_select_environment(name: str) -> str:
|
||||
if not field_info.get("locked", False):
|
||||
_current_config[field_name] = field_info.get("default")
|
||||
|
||||
configurable_count = sum(1 for f in config_fields.values() if not f.get("locked", False))
|
||||
locked_count = sum(1 for f in config_fields.values() if f.get("locked", False))
|
||||
# Auto-set wandb_name to "{env_name}-DATETIME" to avoid overlaps
|
||||
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
_current_config["wandb_name"] = f"{name}-{timestamp}"
|
||||
|
||||
return json.dumps({
|
||||
"message": f"Selected environment: {name}",
|
||||
"environment": name,
|
||||
"file_path": env_info.file_path,
|
||||
"configurable_fields": configurable_count,
|
||||
"locked_fields": locked_count,
|
||||
"config": _current_config,
|
||||
"tip": f"Use rl_get_current_config() to see all {configurable_count} configurable fields.",
|
||||
}, indent=2)
|
||||
|
||||
|
||||
@@ -961,10 +989,11 @@ async def rl_list_runs() -> str:
|
||||
# ============================================================================
|
||||
|
||||
# Test models at different scales for robustness testing
|
||||
# These are cheap, capable models on OpenRouter for testing parsing/scoring
|
||||
TEST_MODELS = [
|
||||
{"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
|
||||
{"id": "zhipu-ai/glm-4-flash", "name": "GLM-4 Flash", "scale": "medium"},
|
||||
{"id": "minimax/minimax-m1", "name": "MiniMax M1", "scale": "large"},
|
||||
{"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
|
||||
{"id": "minimax/minimax-m2.1", "name": "MiniMax M2.1", "scale": "large"},
|
||||
]
|
||||
|
||||
# Default test parameters - quick but representative
|
||||
@@ -1066,18 +1095,35 @@ async def rl_test_inference(
|
||||
|
||||
# Build the process command using Atropos's built-in CLI
|
||||
# This runs the environment's actual code with OpenRouter as the inference backend
|
||||
# We pass our locked settings + test-specific overrides via CLI args
|
||||
cmd = [
|
||||
"python", env_info.file_path, "process",
|
||||
sys.executable, env_info.file_path, "process",
|
||||
# Test-specific overrides
|
||||
"--env.total_steps", str(num_steps),
|
||||
"--env.group_size", str(group_size),
|
||||
"--env.use_wandb", "false",
|
||||
"--env.use_wandb", "false", # No wandb for quick tests
|
||||
"--env.data_path_to_save_groups", str(output_file),
|
||||
# Use locked settings from our config
|
||||
"--env.tokenizer_name", LOCKED_FIELDS["env"]["tokenizer_name"],
|
||||
"--env.max_token_length", str(LOCKED_FIELDS["env"]["max_token_length"]),
|
||||
"--env.max_num_workers", str(LOCKED_FIELDS["env"]["max_num_workers"]),
|
||||
"--env.max_batches_offpolicy", str(LOCKED_FIELDS["env"]["max_batches_offpolicy"]),
|
||||
# OpenRouter config for inference testing
|
||||
# IMPORTANT: Use server_type=openai for OpenRouter (not sglang)
|
||||
# sglang is only for actual training with Tinker's inference server
|
||||
"--openai.base_url", "https://openrouter.ai/api/v1",
|
||||
"--openai.api_key", api_key,
|
||||
"--openai.model_name", model_id,
|
||||
"--openai.server_type", "openai", # OpenRouter is OpenAI-compatible
|
||||
"--openai.health_check", "false", # OpenRouter doesn't have health endpoint
|
||||
]
|
||||
|
||||
print(f"Running: python {Path(env_info.file_path).name} process ...")
|
||||
# Debug: Print the full command
|
||||
cmd_str = " ".join(str(c) for c in cmd)
|
||||
# Hide API key in printed output
|
||||
cmd_display = cmd_str.replace(api_key, "***API_KEY***")
|
||||
print(f"Command: {cmd_display}")
|
||||
print(f"Working dir: {TINKER_ATROPOS_ROOT}")
|
||||
print(f" {num_steps} steps × {group_size} completions = {total_rollouts_per_model} rollouts")
|
||||
|
||||
model_results = {
|
||||
@@ -1105,12 +1151,44 @@ async def rl_test_inference(
|
||||
timeout=600, # 10 minute timeout per model
|
||||
)
|
||||
|
||||
# Decode output
|
||||
stdout_text = stdout.decode() if stdout else ""
|
||||
stderr_text = stderr.decode() if stderr else ""
|
||||
|
||||
# Write logs to files for inspection outside CLI
|
||||
log_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.log"
|
||||
with open(log_file, "w") as f:
|
||||
f.write(f"Command: {cmd_display}\n")
|
||||
f.write(f"Working dir: {TINKER_ATROPOS_ROOT}\n")
|
||||
f.write(f"Return code: {process.returncode}\n")
|
||||
f.write(f"\n{'='*60}\n")
|
||||
f.write(f"STDOUT:\n{'='*60}\n")
|
||||
f.write(stdout_text or "(empty)\n")
|
||||
f.write(f"\n{'='*60}\n")
|
||||
f.write(f"STDERR:\n{'='*60}\n")
|
||||
f.write(stderr_text or "(empty)\n")
|
||||
|
||||
print(f" Log file: {log_file}")
|
||||
|
||||
# Print to console for immediate debugging
|
||||
if stdout_text.strip():
|
||||
print(f"\n--- STDOUT ---")
|
||||
print(stdout_text[-2000:]) # Last 2000 chars
|
||||
|
||||
if stderr_text.strip():
|
||||
print(f"\n--- STDERR ---")
|
||||
print(stderr_text[-2000:]) # Last 2000 chars
|
||||
|
||||
if process.returncode != 0:
|
||||
model_results["error"] = f"Process exited with code {process.returncode}"
|
||||
model_results["stderr"] = stderr.decode()[-1000:]
|
||||
print(f" Error: {model_results['error']}")
|
||||
model_results["stderr"] = stderr_text[-1000:]
|
||||
model_results["stdout"] = stdout_text[-1000:]
|
||||
model_results["log_file"] = str(log_file)
|
||||
print(f"\n ❌ Error: {model_results['error']}")
|
||||
else:
|
||||
print(f" Process completed successfully")
|
||||
print(f"\n ✅ Process completed successfully")
|
||||
print(f" Output file: {output_file}")
|
||||
print(f" File exists: {output_file.exists()}")
|
||||
|
||||
# Parse the output JSONL file
|
||||
if output_file.exists():
|
||||
|
||||
Reference in New Issue
Block a user