Update RL tools and enhance configuration management

- Modified `model_tools.py` to update default model IDs and add new RL function `rl_test_inference`.
- Enhanced `README.md` with installation instructions for submodules and updated API key usage.
- Improved `rl_cli.py` to load configuration from `~/.hermes/config.yaml` and set terminal working directory for RL tools.
- Updated `run_agent.py` to handle empty string arguments as empty objects for better JSON validation.
- Refined installation scripts to ensure submodules are cloned and installed correctly, enhancing setup experience.
This commit is contained in:
teknium1
2026-02-04 13:57:59 -08:00
parent 12bbca95ec
commit 3c0d0dba49
7 changed files with 274 additions and 56 deletions

View File

@@ -15,7 +15,7 @@ irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/ins
```
The installer will:
- Clone to `~/.hermes-agent`
- Clone to `~/.hermes-agent` (with submodules: mini-swe-agent, tinker-atropos)
- Create a virtual environment
- Install all dependencies
- Run the interactive setup wizard
@@ -281,18 +281,10 @@ Train language models with reinforcement learning using the Tinker API and Atrop
```bash
TINKER_API_KEY=your-tinker-key # Get from https://tinker-console.thinkingmachines.ai/keys
WANDB_API_KEY=your-wandb-key # Get from https://wandb.ai/authorize
OPENROUTER_API_KEY=your-key # Optional: for rl_test_inference
```
2. **Install tinker-atropos:** (in a separate directory)
```bash
cd ~/tinker-atropos
pip install -e .
```
3. **Start the RL API server:**
```bash
rl-server # Runs on port 8080 by default
```
2. **That's it!** tinker-atropos is included as a submodule - no separate installation needed.
#### Using RL Tools
@@ -313,10 +305,12 @@ Agent: I'll set up an RL training run on the GSM8k environment...
| `rl_select_environment` | Select an environment for training |
| `rl_get_current_config` | View all configurable options |
| `rl_edit_config` | Change a configuration value |
| `rl_test_inference` | Test environment with OpenRouter (pre-training validation) |
| `rl_start_training` | Start a training run |
| `rl_check_status` | Check training progress |
| `rl_stop_training` | Stop a running training |
| `rl_get_results` | Fetch WandB metrics |
| `rl_list_runs` | List active training runs |
#### Dedicated RL CLI
@@ -434,7 +428,7 @@ skills/
If you prefer not to use the installer:
```bash
# Clone the repository
# Clone the repository (with submodules)
git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git
cd hermes-agent
@@ -445,6 +439,11 @@ cd hermes-agent
python3 -m venv venv
source venv/bin/activate
pip install -e ".[all]"
# Install submodules (required for terminal and RL tools)
pip install -e "./mini-swe-agent" # Terminal tool backend
pip install -e "./tinker-atropos" # RL training backend
hermes setup
```

View File

@@ -665,7 +665,7 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
"models": {
"type": "array",
"items": {"type": "string"},
"description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, zhipu-ai/glm-4-flash, minimax/minimax-m1"
"description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.1"
}
},
"required": []
@@ -730,7 +730,7 @@ def get_all_tool_names() -> List[str]:
"rl_get_current_config", "rl_edit_config",
"rl_start_training", "rl_check_status",
"rl_stop_training", "rl_get_results",
"rl_list_runs"
"rl_list_runs", "rl_test_inference"
])
return tool_names
@@ -898,7 +898,7 @@ def get_tool_definitions(
"rl_get_current_config", "rl_edit_config",
"rl_start_training", "rl_check_status",
"rl_stop_training", "rl_get_results",
"rl_list_runs"
"rl_list_runs", "rl_test_inference"
]
}
legacy_tools = legacy_map.get(toolset_name, [])
@@ -950,7 +950,7 @@ def get_tool_definitions(
"rl_get_current_config", "rl_edit_config",
"rl_start_training", "rl_check_status",
"rl_stop_training", "rl_get_results",
"rl_list_runs"
"rl_list_runs", "rl_test_inference"
]
}
legacy_tools = legacy_map.get(toolset_name, [])
@@ -1407,7 +1407,7 @@ def handle_function_call(
"rl_get_current_config", "rl_edit_config",
"rl_start_training", "rl_check_status",
"rl_stop_training", "rl_get_results",
"rl_list_runs"
"rl_list_runs", "rl_test_inference"
]:
return handle_rl_function_call(function_name, function_args)

View File

@@ -25,14 +25,34 @@ import sys
from pathlib import Path
import fire
import yaml
# Load environment variables from .env file
from dotenv import load_dotenv
env_path = Path(__file__).parent / '.env'
if env_path.exists():
load_dotenv(dotenv_path=env_path)
print(f"✅ Loaded environment variables from {env_path}")
# Load from ~/.hermes/.env first, then local .env
hermes_env_path = Path.home() / '.hermes' / '.env'
local_env_path = Path(__file__).parent / '.env'
if hermes_env_path.exists():
load_dotenv(dotenv_path=hermes_env_path)
print(f"✅ Loaded environment variables from {hermes_env_path}")
elif local_env_path.exists():
load_dotenv(dotenv_path=local_env_path)
print(f"✅ Loaded environment variables from {local_env_path}")
# Set terminal working directory to tinker-atropos submodule
# This ensures terminal commands run in the right context for RL work
tinker_atropos_dir = Path(__file__).parent / 'tinker-atropos'
if tinker_atropos_dir.exists():
os.environ['TERMINAL_CWD'] = str(tinker_atropos_dir)
os.environ['HERMES_QUIET'] = '1' # Disable temp subdirectory creation
print(f"📂 Terminal working directory: {tinker_atropos_dir}")
else:
# Fall back to hermes-agent directory if submodule not found
os.environ['TERMINAL_CWD'] = str(Path(__file__).parent)
os.environ['HERMES_QUIET'] = '1'
print(f"⚠️ tinker-atropos submodule not found, using: {Path(__file__).parent}")
# Import agent and tools
from run_agent import AIAgent
@@ -40,6 +60,50 @@ from model_tools import get_tool_definitions, check_toolset_requirements
from tools.rl_training_tool import check_rl_api_keys, get_missing_keys
# ============================================================================
# Config Loading
# ============================================================================
DEFAULT_MODEL = "anthropic/claude-opus-4.5"
DEFAULT_BASE_URL = "https://openrouter.ai/api/v1"
def load_hermes_config() -> dict:
"""
Load configuration from ~/.hermes/config.yaml.
Returns:
dict: Configuration with model, base_url, etc.
"""
config_path = Path.home() / '.hermes' / 'config.yaml'
config = {
"model": DEFAULT_MODEL,
"base_url": DEFAULT_BASE_URL,
}
if config_path.exists():
try:
with open(config_path, "r") as f:
file_config = yaml.safe_load(f) or {}
# Get model from config
if "model" in file_config:
if isinstance(file_config["model"], str):
config["model"] = file_config["model"]
elif isinstance(file_config["model"], dict):
config["model"] = file_config["model"].get("default", DEFAULT_MODEL)
# Get base_url if specified
if "base_url" in file_config:
config["base_url"] = file_config["base_url"]
except Exception as e:
print(f"⚠️ Warning: Failed to load config.yaml: {e}")
return config
# ============================================================================
# RL-Specific Configuration
# ============================================================================
@@ -108,7 +172,7 @@ When asked to train a model, follow this workflow:
"""
# Toolsets to enable for RL workflows
RL_TOOLSETS = ["base", "terminal", "web", "rl"]
RL_TOOLSETS = ["terminal", "web", "rl"]
# ============================================================================
@@ -172,9 +236,9 @@ def list_environments_sync():
def main(
task: str = None,
model: str = "anthropic/claude-sonnet-4-20250514",
model: str = None,
api_key: str = None,
base_url: str = "https://openrouter.ai/api/v1",
base_url: str = None,
max_iterations: int = RL_MAX_ITERATIONS,
interactive: bool = False,
list_environments: bool = False,
@@ -187,9 +251,9 @@ def main(
Args:
task: The training task/goal (e.g., "Train a model on GSM8k for math")
model: Model to use for the agent (default: claude-sonnet-4)
model: Model to use for the agent (reads from ~/.hermes/config.yaml if not provided)
api_key: OpenRouter API key (uses OPENROUTER_API_KEY env var if not provided)
base_url: API base URL (default: OpenRouter)
base_url: API base URL (reads from config or defaults to OpenRouter)
max_iterations: Maximum agent iterations (default: 200 for long workflows)
interactive: Run in interactive mode (multiple conversations)
list_environments: Just list available RL environments and exit
@@ -210,6 +274,15 @@ def main(
# Check server status
python rl_cli.py --check-server
"""
# Load config from ~/.hermes/config.yaml
config = load_hermes_config()
# Use config values if not explicitly provided
if model is None:
model = config["model"]
if base_url is None:
base_url = config["base_url"]
print("🎯 RL Training Agent")
print("=" * 60)

View File

@@ -1764,10 +1764,16 @@ class AIAgent:
self._invalid_tool_retries = 0
# Validate tool call arguments are valid JSON
# Handle empty strings as empty objects (common model quirk)
invalid_json_args = []
for tc in assistant_message.tool_calls:
args = tc.function.arguments
# Treat empty/whitespace strings as empty object
if not args or not args.strip():
tc.function.arguments = "{}"
continue
try:
json.loads(tc.function.arguments)
json.loads(args)
except json.JSONDecodeError as e:
invalid_json_args.append((tc.function.name, str(e)))

View File

@@ -150,14 +150,15 @@ function Install-Repository {
}
} else {
# Try SSH first (for private repo access), fall back to HTTPS
# Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos
Write-Info "Trying SSH clone..."
$sshResult = git clone --branch $Branch $RepoUrlSsh $InstallDir 2>&1
$sshResult = git clone --branch $Branch --recurse-submodules $RepoUrlSsh $InstallDir 2>&1
if ($LASTEXITCODE -eq 0) {
Write-Success "Cloned via SSH"
} else {
Write-Info "SSH failed, trying HTTPS..."
$httpsResult = git clone --branch $Branch $RepoUrlHttps $InstallDir 2>&1
$httpsResult = git clone --branch $Branch --recurse-submodules $RepoUrlHttps $InstallDir 2>&1
if ($LASTEXITCODE -eq 0) {
Write-Success "Cloned via HTTPS"
@@ -171,6 +172,13 @@ function Install-Repository {
}
}
# Ensure submodules are initialized and updated (for existing installs or if --recurse failed)
Write-Info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
Push-Location $InstallDir
git submodule update --init --recursive
Pop-Location
Write-Success "Submodules ready"
Write-Success "Repository ready"
}
@@ -208,15 +216,43 @@ function Install-Dependencies {
& .\venv\Scripts\Activate.ps1
}
# Install main package
try {
pip install -e ".[all]" 2>&1 | Out-Null
} catch {
pip install -e "." | Out-Null
}
Write-Success "Main package installed"
# Install submodules
Write-Info "Installing mini-swe-agent (terminal tool backend)..."
if (Test-Path "mini-swe-agent\pyproject.toml") {
try {
pip install -e ".\mini-swe-agent" 2>&1 | Out-Null
Write-Success "mini-swe-agent installed"
} catch {
Write-Warning "mini-swe-agent install failed (terminal tools may not work)"
}
} else {
Write-Warning "mini-swe-agent not found (run: git submodule update --init)"
}
Write-Info "Installing tinker-atropos (RL training backend)..."
if (Test-Path "tinker-atropos\pyproject.toml") {
try {
pip install -e ".\tinker-atropos" 2>&1 | Out-Null
Write-Success "tinker-atropos installed"
} catch {
Write-Warning "tinker-atropos install failed (RL tools may not work)"
}
} else {
Write-Warning "tinker-atropos not found (run: git submodule update --init)"
}
Pop-Location
Write-Success "Dependencies installed"
Write-Success "All dependencies installed"
}
function Set-PathVariable {

View File

@@ -292,12 +292,13 @@ clone_repo() {
fi
else
# Try SSH first (for private repo access), fall back to HTTPS
# Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos
log_info "Trying SSH clone..."
if git clone --branch "$BRANCH" "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then
if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then
log_success "Cloned via SSH"
else
log_info "SSH failed, trying HTTPS..."
if git clone --branch "$BRANCH" "$REPO_URL_HTTPS" "$INSTALL_DIR"; then
if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_HTTPS" "$INSTALL_DIR"; then
log_success "Cloned via HTTPS"
else
log_error "Failed to clone repository"
@@ -310,6 +311,12 @@ clone_repo() {
fi
cd "$INSTALL_DIR"
# Ensure submodules are initialized and updated (for existing installs or if --recurse failed)
log_info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
git submodule update --init --recursive
log_success "Submodules ready"
log_success "Repository ready"
}
@@ -343,10 +350,29 @@ install_deps() {
source venv/bin/activate
fi
# Install the package in editable mode with all extras
# Install the main package in editable mode with all extras
pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null
log_success "Dependencies installed"
log_success "Main package installed"
# Install submodules
log_info "Installing mini-swe-agent (terminal tool backend)..."
if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
pip install -e "./mini-swe-agent" > /dev/null 2>&1 || log_warn "mini-swe-agent install failed (terminal tools may not work)"
log_success "mini-swe-agent installed"
else
log_warn "mini-swe-agent not found (run: git submodule update --init)"
fi
log_info "Installing tinker-atropos (RL training backend)..."
if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
pip install -e "./tinker-atropos" > /dev/null 2>&1 || log_warn "tinker-atropos install failed (RL tools may not work)"
log_success "tinker-atropos installed"
else
log_warn "tinker-atropos not found (run: git submodule update --init)"
fi
log_success "All dependencies installed"
}
setup_path() {

View File

@@ -37,6 +37,7 @@ import subprocess
import sys
import time
import uuid
from datetime import datetime
import yaml
from dataclasses import dataclass, field
from pathlib import Path
@@ -84,6 +85,7 @@ LOCKED_FIELDS = {
"weight": 1.0,
"num_requests_for_eval": 256,
"timeout": 3600,
"server_type": "sglang", # Tinker uses sglang for actual training
}
],
"tinker": {
@@ -211,6 +213,9 @@ def _scan_environments() -> List[EnvironmentInfo]:
def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
"""
Dynamically import an environment and extract its config fields.
Uses config_init() to get the actual config class, with fallback to
directly importing BaseEnvConfig if config_init fails.
"""
try:
# Load the environment module
@@ -230,15 +235,38 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
if not env_class:
return {}
# Call config_init to get the actual config
# Try calling config_init to get the actual config class
config_class = None
try:
env_config, server_configs = env_class.config_init()
config_class = type(env_config)
except Exception as config_error:
# Fallback: try to import BaseEnvConfig directly from atroposlib
print(f"Note: config_init failed ({config_error}), using BaseEnvConfig defaults")
try:
from atroposlib.envs.base import BaseEnvConfig
config_class = BaseEnvConfig
except ImportError:
return {}
if not config_class:
return {}
# Helper to make values JSON-serializable (handle enums, etc.)
def make_serializable(val):
if val is None:
return None
if hasattr(val, 'value'): # Enum
return val.value
if hasattr(val, 'name') and hasattr(val, '__class__') and 'Enum' in str(type(val)):
return val.name
return val
# Extract fields from the Pydantic model
fields = {}
for field_name, field_info in config_class.model_fields.items():
field_type = field_info.annotation
default = field_info.default
default = make_serializable(field_info.default)
description = field_info.description or ""
is_locked = field_name in LOCKED_FIELD_NAMES
@@ -248,12 +276,15 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]:
if hasattr(field_type, "__origin__"):
type_name = str(field_type)
locked_value = LOCKED_FIELDS.get("env", {}).get(field_name, default)
current_value = make_serializable(locked_value) if is_locked else default
fields[field_name] = {
"type": type_name,
"default": default if default is not None else None,
"default": default,
"description": description,
"locked": is_locked,
"current_value": LOCKED_FIELDS.get("env", {}).get(field_name, default) if is_locked else default,
"current_value": current_value,
}
return fields
@@ -315,7 +346,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
trainer_log_file = open(trainer_log, "w")
run_state.trainer_process = subprocess.Popen(
["python", "launch_training.py", "--config", str(config_path)],
[sys.executable, "launch_training.py", "--config", str(config_path)],
stdout=trainer_log_file,
stderr=subprocess.STDOUT,
cwd=str(TINKER_ATROPOS_ROOT),
@@ -355,7 +386,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path):
env_log_file = open(env_log, "w")
run_state.env_process = subprocess.Popen(
["python", str(env_info.file_path), "serve", "--config", str(config_path)],
[sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)],
stdout=env_log_file,
stderr=subprocess.STDOUT,
cwd=str(TINKER_ATROPOS_ROOT),
@@ -543,17 +574,14 @@ async def rl_select_environment(name: str) -> str:
if not field_info.get("locked", False):
_current_config[field_name] = field_info.get("default")
configurable_count = sum(1 for f in config_fields.values() if not f.get("locked", False))
locked_count = sum(1 for f in config_fields.values() if f.get("locked", False))
# Auto-set wandb_name to "{env_name}-DATETIME" to avoid overlaps
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
_current_config["wandb_name"] = f"{name}-{timestamp}"
return json.dumps({
"message": f"Selected environment: {name}",
"environment": name,
"file_path": env_info.file_path,
"configurable_fields": configurable_count,
"locked_fields": locked_count,
"config": _current_config,
"tip": f"Use rl_get_current_config() to see all {configurable_count} configurable fields.",
}, indent=2)
@@ -961,10 +989,11 @@ async def rl_list_runs() -> str:
# ============================================================================
# Test models at different scales for robustness testing
# These are cheap, capable models on OpenRouter for testing parsing/scoring
TEST_MODELS = [
{"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"},
{"id": "zhipu-ai/glm-4-flash", "name": "GLM-4 Flash", "scale": "medium"},
{"id": "minimax/minimax-m1", "name": "MiniMax M1", "scale": "large"},
{"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"},
{"id": "minimax/minimax-m2.1", "name": "MiniMax M2.1", "scale": "large"},
]
# Default test parameters - quick but representative
@@ -1066,18 +1095,35 @@ async def rl_test_inference(
# Build the process command using Atropos's built-in CLI
# This runs the environment's actual code with OpenRouter as the inference backend
# We pass our locked settings + test-specific overrides via CLI args
cmd = [
"python", env_info.file_path, "process",
sys.executable, env_info.file_path, "process",
# Test-specific overrides
"--env.total_steps", str(num_steps),
"--env.group_size", str(group_size),
"--env.use_wandb", "false",
"--env.use_wandb", "false", # No wandb for quick tests
"--env.data_path_to_save_groups", str(output_file),
# Use locked settings from our config
"--env.tokenizer_name", LOCKED_FIELDS["env"]["tokenizer_name"],
"--env.max_token_length", str(LOCKED_FIELDS["env"]["max_token_length"]),
"--env.max_num_workers", str(LOCKED_FIELDS["env"]["max_num_workers"]),
"--env.max_batches_offpolicy", str(LOCKED_FIELDS["env"]["max_batches_offpolicy"]),
# OpenRouter config for inference testing
# IMPORTANT: Use server_type=openai for OpenRouter (not sglang)
# sglang is only for actual training with Tinker's inference server
"--openai.base_url", "https://openrouter.ai/api/v1",
"--openai.api_key", api_key,
"--openai.model_name", model_id,
"--openai.server_type", "openai", # OpenRouter is OpenAI-compatible
"--openai.health_check", "false", # OpenRouter doesn't have health endpoint
]
print(f"Running: python {Path(env_info.file_path).name} process ...")
# Debug: Print the full command
cmd_str = " ".join(str(c) for c in cmd)
# Hide API key in printed output
cmd_display = cmd_str.replace(api_key, "***API_KEY***")
print(f"Command: {cmd_display}")
print(f"Working dir: {TINKER_ATROPOS_ROOT}")
print(f" {num_steps} steps × {group_size} completions = {total_rollouts_per_model} rollouts")
model_results = {
@@ -1105,12 +1151,44 @@ async def rl_test_inference(
timeout=600, # 10 minute timeout per model
)
# Decode output
stdout_text = stdout.decode() if stdout else ""
stderr_text = stderr.decode() if stderr else ""
# Write logs to files for inspection outside CLI
log_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.log"
with open(log_file, "w") as f:
f.write(f"Command: {cmd_display}\n")
f.write(f"Working dir: {TINKER_ATROPOS_ROOT}\n")
f.write(f"Return code: {process.returncode}\n")
f.write(f"\n{'='*60}\n")
f.write(f"STDOUT:\n{'='*60}\n")
f.write(stdout_text or "(empty)\n")
f.write(f"\n{'='*60}\n")
f.write(f"STDERR:\n{'='*60}\n")
f.write(stderr_text or "(empty)\n")
print(f" Log file: {log_file}")
# Print to console for immediate debugging
if stdout_text.strip():
print(f"\n--- STDOUT ---")
print(stdout_text[-2000:]) # Last 2000 chars
if stderr_text.strip():
print(f"\n--- STDERR ---")
print(stderr_text[-2000:]) # Last 2000 chars
if process.returncode != 0:
model_results["error"] = f"Process exited with code {process.returncode}"
model_results["stderr"] = stderr.decode()[-1000:]
print(f" Error: {model_results['error']}")
model_results["stderr"] = stderr_text[-1000:]
model_results["stdout"] = stdout_text[-1000:]
model_results["log_file"] = str(log_file)
print(f"\n ❌ Error: {model_results['error']}")
else:
print(f" Process completed successfully")
print(f"\n Process completed successfully")
print(f" Output file: {output_file}")
print(f" File exists: {output_file.exists()}")
# Parse the output JSONL file
if output_file.exists():