diff --git a/README.md b/README.md index f49ae26af..a1673c912 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ irm https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/ins ``` The installer will: -- Clone to `~/.hermes-agent` +- Clone to `~/.hermes-agent` (with submodules: mini-swe-agent, tinker-atropos) - Create a virtual environment - Install all dependencies - Run the interactive setup wizard @@ -281,18 +281,10 @@ Train language models with reinforcement learning using the Tinker API and Atrop ```bash TINKER_API_KEY=your-tinker-key # Get from https://tinker-console.thinkingmachines.ai/keys WANDB_API_KEY=your-wandb-key # Get from https://wandb.ai/authorize +OPENROUTER_API_KEY=your-key # Optional: for rl_test_inference ``` -2. **Install tinker-atropos:** (in a separate directory) -```bash -cd ~/tinker-atropos -pip install -e . -``` - -3. **Start the RL API server:** -```bash -rl-server # Runs on port 8080 by default -``` +2. **That's it!** tinker-atropos is included as a submodule - no separate installation needed. #### Using RL Tools @@ -313,10 +305,12 @@ Agent: I'll set up an RL training run on the GSM8k environment... | `rl_select_environment` | Select an environment for training | | `rl_get_current_config` | View all configurable options | | `rl_edit_config` | Change a configuration value | +| `rl_test_inference` | Test environment with OpenRouter (pre-training validation) | | `rl_start_training` | Start a training run | | `rl_check_status` | Check training progress | | `rl_stop_training` | Stop a running training | | `rl_get_results` | Fetch WandB metrics | +| `rl_list_runs` | List active training runs | #### Dedicated RL CLI @@ -434,7 +428,7 @@ skills/ If you prefer not to use the installer: ```bash -# Clone the repository +# Clone the repository (with submodules) git clone --recurse-submodules https://github.com/NousResearch/hermes-agent.git cd hermes-agent @@ -445,6 +439,11 @@ cd hermes-agent python3 -m venv venv source venv/bin/activate pip install -e ".[all]" + +# Install submodules (required for terminal and RL tools) +pip install -e "./mini-swe-agent" # Terminal tool backend +pip install -e "./tinker-atropos" # RL training backend + hermes setup ``` diff --git a/model_tools.py b/model_tools.py index 847e56eff..e95a595c8 100644 --- a/model_tools.py +++ b/model_tools.py @@ -665,7 +665,7 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]: "models": { "type": "array", "items": {"type": "string"}, - "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, zhipu-ai/glm-4-flash, minimax/minimax-m1" + "description": "Optional list of OpenRouter model IDs. Default: qwen/qwen3-8b, z-ai/glm-4.7-flash, minimax/minimax-m2.1" } }, "required": [] @@ -730,7 +730,7 @@ def get_all_tool_names() -> List[str]: "rl_get_current_config", "rl_edit_config", "rl_start_training", "rl_check_status", "rl_stop_training", "rl_get_results", - "rl_list_runs" + "rl_list_runs", "rl_test_inference" ]) return tool_names @@ -898,7 +898,7 @@ def get_tool_definitions( "rl_get_current_config", "rl_edit_config", "rl_start_training", "rl_check_status", "rl_stop_training", "rl_get_results", - "rl_list_runs" + "rl_list_runs", "rl_test_inference" ] } legacy_tools = legacy_map.get(toolset_name, []) @@ -950,7 +950,7 @@ def get_tool_definitions( "rl_get_current_config", "rl_edit_config", "rl_start_training", "rl_check_status", "rl_stop_training", "rl_get_results", - "rl_list_runs" + "rl_list_runs", "rl_test_inference" ] } legacy_tools = legacy_map.get(toolset_name, []) @@ -1407,7 +1407,7 @@ def handle_function_call( "rl_get_current_config", "rl_edit_config", "rl_start_training", "rl_check_status", "rl_stop_training", "rl_get_results", - "rl_list_runs" + "rl_list_runs", "rl_test_inference" ]: return handle_rl_function_call(function_name, function_args) diff --git a/rl_cli.py b/rl_cli.py index fe0eecfd4..a45c365b4 100644 --- a/rl_cli.py +++ b/rl_cli.py @@ -25,14 +25,34 @@ import sys from pathlib import Path import fire +import yaml # Load environment variables from .env file from dotenv import load_dotenv -env_path = Path(__file__).parent / '.env' -if env_path.exists(): - load_dotenv(dotenv_path=env_path) - print(f"✅ Loaded environment variables from {env_path}") +# Load from ~/.hermes/.env first, then local .env +hermes_env_path = Path.home() / '.hermes' / '.env' +local_env_path = Path(__file__).parent / '.env' + +if hermes_env_path.exists(): + load_dotenv(dotenv_path=hermes_env_path) + print(f"✅ Loaded environment variables from {hermes_env_path}") +elif local_env_path.exists(): + load_dotenv(dotenv_path=local_env_path) + print(f"✅ Loaded environment variables from {local_env_path}") + +# Set terminal working directory to tinker-atropos submodule +# This ensures terminal commands run in the right context for RL work +tinker_atropos_dir = Path(__file__).parent / 'tinker-atropos' +if tinker_atropos_dir.exists(): + os.environ['TERMINAL_CWD'] = str(tinker_atropos_dir) + os.environ['HERMES_QUIET'] = '1' # Disable temp subdirectory creation + print(f"📂 Terminal working directory: {tinker_atropos_dir}") +else: + # Fall back to hermes-agent directory if submodule not found + os.environ['TERMINAL_CWD'] = str(Path(__file__).parent) + os.environ['HERMES_QUIET'] = '1' + print(f"⚠️ tinker-atropos submodule not found, using: {Path(__file__).parent}") # Import agent and tools from run_agent import AIAgent @@ -40,6 +60,50 @@ from model_tools import get_tool_definitions, check_toolset_requirements from tools.rl_training_tool import check_rl_api_keys, get_missing_keys +# ============================================================================ +# Config Loading +# ============================================================================ + +DEFAULT_MODEL = "anthropic/claude-opus-4.5" +DEFAULT_BASE_URL = "https://openrouter.ai/api/v1" + + +def load_hermes_config() -> dict: + """ + Load configuration from ~/.hermes/config.yaml. + + Returns: + dict: Configuration with model, base_url, etc. + """ + config_path = Path.home() / '.hermes' / 'config.yaml' + + config = { + "model": DEFAULT_MODEL, + "base_url": DEFAULT_BASE_URL, + } + + if config_path.exists(): + try: + with open(config_path, "r") as f: + file_config = yaml.safe_load(f) or {} + + # Get model from config + if "model" in file_config: + if isinstance(file_config["model"], str): + config["model"] = file_config["model"] + elif isinstance(file_config["model"], dict): + config["model"] = file_config["model"].get("default", DEFAULT_MODEL) + + # Get base_url if specified + if "base_url" in file_config: + config["base_url"] = file_config["base_url"] + + except Exception as e: + print(f"⚠️ Warning: Failed to load config.yaml: {e}") + + return config + + # ============================================================================ # RL-Specific Configuration # ============================================================================ @@ -108,7 +172,7 @@ When asked to train a model, follow this workflow: """ # Toolsets to enable for RL workflows -RL_TOOLSETS = ["base", "terminal", "web", "rl"] +RL_TOOLSETS = ["terminal", "web", "rl"] # ============================================================================ @@ -172,9 +236,9 @@ def list_environments_sync(): def main( task: str = None, - model: str = "anthropic/claude-sonnet-4-20250514", + model: str = None, api_key: str = None, - base_url: str = "https://openrouter.ai/api/v1", + base_url: str = None, max_iterations: int = RL_MAX_ITERATIONS, interactive: bool = False, list_environments: bool = False, @@ -187,9 +251,9 @@ def main( Args: task: The training task/goal (e.g., "Train a model on GSM8k for math") - model: Model to use for the agent (default: claude-sonnet-4) + model: Model to use for the agent (reads from ~/.hermes/config.yaml if not provided) api_key: OpenRouter API key (uses OPENROUTER_API_KEY env var if not provided) - base_url: API base URL (default: OpenRouter) + base_url: API base URL (reads from config or defaults to OpenRouter) max_iterations: Maximum agent iterations (default: 200 for long workflows) interactive: Run in interactive mode (multiple conversations) list_environments: Just list available RL environments and exit @@ -210,6 +274,15 @@ def main( # Check server status python rl_cli.py --check-server """ + # Load config from ~/.hermes/config.yaml + config = load_hermes_config() + + # Use config values if not explicitly provided + if model is None: + model = config["model"] + if base_url is None: + base_url = config["base_url"] + print("🎯 RL Training Agent") print("=" * 60) diff --git a/run_agent.py b/run_agent.py index 7b70289ff..1aceb5b58 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1764,10 +1764,16 @@ class AIAgent: self._invalid_tool_retries = 0 # Validate tool call arguments are valid JSON + # Handle empty strings as empty objects (common model quirk) invalid_json_args = [] for tc in assistant_message.tool_calls: + args = tc.function.arguments + # Treat empty/whitespace strings as empty object + if not args or not args.strip(): + tc.function.arguments = "{}" + continue try: - json.loads(tc.function.arguments) + json.loads(args) except json.JSONDecodeError as e: invalid_json_args.append((tc.function.name, str(e))) diff --git a/scripts/install.ps1 b/scripts/install.ps1 index caf80288d..3666b21b5 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -150,14 +150,15 @@ function Install-Repository { } } else { # Try SSH first (for private repo access), fall back to HTTPS + # Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos Write-Info "Trying SSH clone..." - $sshResult = git clone --branch $Branch $RepoUrlSsh $InstallDir 2>&1 + $sshResult = git clone --branch $Branch --recurse-submodules $RepoUrlSsh $InstallDir 2>&1 if ($LASTEXITCODE -eq 0) { Write-Success "Cloned via SSH" } else { Write-Info "SSH failed, trying HTTPS..." - $httpsResult = git clone --branch $Branch $RepoUrlHttps $InstallDir 2>&1 + $httpsResult = git clone --branch $Branch --recurse-submodules $RepoUrlHttps $InstallDir 2>&1 if ($LASTEXITCODE -eq 0) { Write-Success "Cloned via HTTPS" @@ -171,6 +172,13 @@ function Install-Repository { } } + # Ensure submodules are initialized and updated (for existing installs or if --recurse failed) + Write-Info "Initializing submodules (mini-swe-agent, tinker-atropos)..." + Push-Location $InstallDir + git submodule update --init --recursive + Pop-Location + Write-Success "Submodules ready" + Write-Success "Repository ready" } @@ -208,15 +216,43 @@ function Install-Dependencies { & .\venv\Scripts\Activate.ps1 } + # Install main package try { pip install -e ".[all]" 2>&1 | Out-Null } catch { pip install -e "." | Out-Null } + Write-Success "Main package installed" + + # Install submodules + Write-Info "Installing mini-swe-agent (terminal tool backend)..." + if (Test-Path "mini-swe-agent\pyproject.toml") { + try { + pip install -e ".\mini-swe-agent" 2>&1 | Out-Null + Write-Success "mini-swe-agent installed" + } catch { + Write-Warning "mini-swe-agent install failed (terminal tools may not work)" + } + } else { + Write-Warning "mini-swe-agent not found (run: git submodule update --init)" + } + + Write-Info "Installing tinker-atropos (RL training backend)..." + if (Test-Path "tinker-atropos\pyproject.toml") { + try { + pip install -e ".\tinker-atropos" 2>&1 | Out-Null + Write-Success "tinker-atropos installed" + } catch { + Write-Warning "tinker-atropos install failed (RL tools may not work)" + } + } else { + Write-Warning "tinker-atropos not found (run: git submodule update --init)" + } + Pop-Location - Write-Success "Dependencies installed" + Write-Success "All dependencies installed" } function Set-PathVariable { diff --git a/scripts/install.sh b/scripts/install.sh index 463a0d5be..4b8affaa6 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -292,12 +292,13 @@ clone_repo() { fi else # Try SSH first (for private repo access), fall back to HTTPS + # Use --recurse-submodules to also clone mini-swe-agent and tinker-atropos log_info "Trying SSH clone..." - if git clone --branch "$BRANCH" "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then + if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_SSH" "$INSTALL_DIR" 2>/dev/null; then log_success "Cloned via SSH" else log_info "SSH failed, trying HTTPS..." - if git clone --branch "$BRANCH" "$REPO_URL_HTTPS" "$INSTALL_DIR"; then + if git clone --branch "$BRANCH" --recurse-submodules "$REPO_URL_HTTPS" "$INSTALL_DIR"; then log_success "Cloned via HTTPS" else log_error "Failed to clone repository" @@ -310,6 +311,12 @@ clone_repo() { fi cd "$INSTALL_DIR" + + # Ensure submodules are initialized and updated (for existing installs or if --recurse failed) + log_info "Initializing submodules (mini-swe-agent, tinker-atropos)..." + git submodule update --init --recursive + log_success "Submodules ready" + log_success "Repository ready" } @@ -343,10 +350,29 @@ install_deps() { source venv/bin/activate fi - # Install the package in editable mode with all extras + # Install the main package in editable mode with all extras pip install -e ".[all]" > /dev/null 2>&1 || pip install -e "." > /dev/null - log_success "Dependencies installed" + log_success "Main package installed" + + # Install submodules + log_info "Installing mini-swe-agent (terminal tool backend)..." + if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then + pip install -e "./mini-swe-agent" > /dev/null 2>&1 || log_warn "mini-swe-agent install failed (terminal tools may not work)" + log_success "mini-swe-agent installed" + else + log_warn "mini-swe-agent not found (run: git submodule update --init)" + fi + + log_info "Installing tinker-atropos (RL training backend)..." + if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then + pip install -e "./tinker-atropos" > /dev/null 2>&1 || log_warn "tinker-atropos install failed (RL tools may not work)" + log_success "tinker-atropos installed" + else + log_warn "tinker-atropos not found (run: git submodule update --init)" + fi + + log_success "All dependencies installed" } setup_path() { diff --git a/tools/rl_training_tool.py b/tools/rl_training_tool.py index 3c257c4c5..8c18bee67 100644 --- a/tools/rl_training_tool.py +++ b/tools/rl_training_tool.py @@ -37,6 +37,7 @@ import subprocess import sys import time import uuid +from datetime import datetime import yaml from dataclasses import dataclass, field from pathlib import Path @@ -84,6 +85,7 @@ LOCKED_FIELDS = { "weight": 1.0, "num_requests_for_eval": 256, "timeout": 3600, + "server_type": "sglang", # Tinker uses sglang for actual training } ], "tinker": { @@ -211,6 +213,9 @@ def _scan_environments() -> List[EnvironmentInfo]: def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]: """ Dynamically import an environment and extract its config fields. + + Uses config_init() to get the actual config class, with fallback to + directly importing BaseEnvConfig if config_init fails. """ try: # Load the environment module @@ -230,15 +235,38 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]: if not env_class: return {} - # Call config_init to get the actual config - env_config, server_configs = env_class.config_init() - config_class = type(env_config) + # Try calling config_init to get the actual config class + config_class = None + try: + env_config, server_configs = env_class.config_init() + config_class = type(env_config) + except Exception as config_error: + # Fallback: try to import BaseEnvConfig directly from atroposlib + print(f"Note: config_init failed ({config_error}), using BaseEnvConfig defaults") + try: + from atroposlib.envs.base import BaseEnvConfig + config_class = BaseEnvConfig + except ImportError: + return {} + + if not config_class: + return {} + + # Helper to make values JSON-serializable (handle enums, etc.) + def make_serializable(val): + if val is None: + return None + if hasattr(val, 'value'): # Enum + return val.value + if hasattr(val, 'name') and hasattr(val, '__class__') and 'Enum' in str(type(val)): + return val.name + return val # Extract fields from the Pydantic model fields = {} for field_name, field_info in config_class.model_fields.items(): field_type = field_info.annotation - default = field_info.default + default = make_serializable(field_info.default) description = field_info.description or "" is_locked = field_name in LOCKED_FIELD_NAMES @@ -248,12 +276,15 @@ def _get_env_config_fields(env_file_path: str) -> Dict[str, Dict[str, Any]]: if hasattr(field_type, "__origin__"): type_name = str(field_type) + locked_value = LOCKED_FIELDS.get("env", {}).get(field_name, default) + current_value = make_serializable(locked_value) if is_locked else default + fields[field_name] = { "type": type_name, - "default": default if default is not None else None, + "default": default, "description": description, "locked": is_locked, - "current_value": LOCKED_FIELDS.get("env", {}).get(field_name, default) if is_locked else default, + "current_value": current_value, } return fields @@ -315,7 +346,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path): trainer_log_file = open(trainer_log, "w") run_state.trainer_process = subprocess.Popen( - ["python", "launch_training.py", "--config", str(config_path)], + [sys.executable, "launch_training.py", "--config", str(config_path)], stdout=trainer_log_file, stderr=subprocess.STDOUT, cwd=str(TINKER_ATROPOS_ROOT), @@ -355,7 +386,7 @@ async def _spawn_training_run(run_state: RunState, config_path: Path): env_log_file = open(env_log, "w") run_state.env_process = subprocess.Popen( - ["python", str(env_info.file_path), "serve", "--config", str(config_path)], + [sys.executable, str(env_info.file_path), "serve", "--config", str(config_path)], stdout=env_log_file, stderr=subprocess.STDOUT, cwd=str(TINKER_ATROPOS_ROOT), @@ -543,17 +574,14 @@ async def rl_select_environment(name: str) -> str: if not field_info.get("locked", False): _current_config[field_name] = field_info.get("default") - configurable_count = sum(1 for f in config_fields.values() if not f.get("locked", False)) - locked_count = sum(1 for f in config_fields.values() if f.get("locked", False)) + # Auto-set wandb_name to "{env_name}-DATETIME" to avoid overlaps + timestamp = datetime.now().strftime("%Y%m%d-%H%M%S") + _current_config["wandb_name"] = f"{name}-{timestamp}" return json.dumps({ "message": f"Selected environment: {name}", "environment": name, "file_path": env_info.file_path, - "configurable_fields": configurable_count, - "locked_fields": locked_count, - "config": _current_config, - "tip": f"Use rl_get_current_config() to see all {configurable_count} configurable fields.", }, indent=2) @@ -961,10 +989,11 @@ async def rl_list_runs() -> str: # ============================================================================ # Test models at different scales for robustness testing +# These are cheap, capable models on OpenRouter for testing parsing/scoring TEST_MODELS = [ {"id": "qwen/qwen3-8b", "name": "Qwen3 8B", "scale": "small"}, - {"id": "zhipu-ai/glm-4-flash", "name": "GLM-4 Flash", "scale": "medium"}, - {"id": "minimax/minimax-m1", "name": "MiniMax M1", "scale": "large"}, + {"id": "z-ai/glm-4.7-flash", "name": "GLM-4.7 Flash", "scale": "medium"}, + {"id": "minimax/minimax-m2.1", "name": "MiniMax M2.1", "scale": "large"}, ] # Default test parameters - quick but representative @@ -1066,18 +1095,35 @@ async def rl_test_inference( # Build the process command using Atropos's built-in CLI # This runs the environment's actual code with OpenRouter as the inference backend + # We pass our locked settings + test-specific overrides via CLI args cmd = [ - "python", env_info.file_path, "process", + sys.executable, env_info.file_path, "process", + # Test-specific overrides "--env.total_steps", str(num_steps), "--env.group_size", str(group_size), - "--env.use_wandb", "false", + "--env.use_wandb", "false", # No wandb for quick tests "--env.data_path_to_save_groups", str(output_file), + # Use locked settings from our config + "--env.tokenizer_name", LOCKED_FIELDS["env"]["tokenizer_name"], + "--env.max_token_length", str(LOCKED_FIELDS["env"]["max_token_length"]), + "--env.max_num_workers", str(LOCKED_FIELDS["env"]["max_num_workers"]), + "--env.max_batches_offpolicy", str(LOCKED_FIELDS["env"]["max_batches_offpolicy"]), + # OpenRouter config for inference testing + # IMPORTANT: Use server_type=openai for OpenRouter (not sglang) + # sglang is only for actual training with Tinker's inference server "--openai.base_url", "https://openrouter.ai/api/v1", "--openai.api_key", api_key, "--openai.model_name", model_id, + "--openai.server_type", "openai", # OpenRouter is OpenAI-compatible + "--openai.health_check", "false", # OpenRouter doesn't have health endpoint ] - print(f"Running: python {Path(env_info.file_path).name} process ...") + # Debug: Print the full command + cmd_str = " ".join(str(c) for c in cmd) + # Hide API key in printed output + cmd_display = cmd_str.replace(api_key, "***API_KEY***") + print(f"Command: {cmd_display}") + print(f"Working dir: {TINKER_ATROPOS_ROOT}") print(f" {num_steps} steps × {group_size} completions = {total_rollouts_per_model} rollouts") model_results = { @@ -1105,12 +1151,44 @@ async def rl_test_inference( timeout=600, # 10 minute timeout per model ) + # Decode output + stdout_text = stdout.decode() if stdout else "" + stderr_text = stderr.decode() if stderr else "" + + # Write logs to files for inspection outside CLI + log_file = test_output_dir / f"test_{_current_env}_{model_safe_name}.log" + with open(log_file, "w") as f: + f.write(f"Command: {cmd_display}\n") + f.write(f"Working dir: {TINKER_ATROPOS_ROOT}\n") + f.write(f"Return code: {process.returncode}\n") + f.write(f"\n{'='*60}\n") + f.write(f"STDOUT:\n{'='*60}\n") + f.write(stdout_text or "(empty)\n") + f.write(f"\n{'='*60}\n") + f.write(f"STDERR:\n{'='*60}\n") + f.write(stderr_text or "(empty)\n") + + print(f" Log file: {log_file}") + + # Print to console for immediate debugging + if stdout_text.strip(): + print(f"\n--- STDOUT ---") + print(stdout_text[-2000:]) # Last 2000 chars + + if stderr_text.strip(): + print(f"\n--- STDERR ---") + print(stderr_text[-2000:]) # Last 2000 chars + if process.returncode != 0: model_results["error"] = f"Process exited with code {process.returncode}" - model_results["stderr"] = stderr.decode()[-1000:] - print(f" Error: {model_results['error']}") + model_results["stderr"] = stderr_text[-1000:] + model_results["stdout"] = stdout_text[-1000:] + model_results["log_file"] = str(log_file) + print(f"\n ❌ Error: {model_results['error']}") else: - print(f" Process completed successfully") + print(f"\n ✅ Process completed successfully") + print(f" Output file: {output_file}") + print(f" File exists: {output_file.exists()}") # Parse the output JSONL file if output_file.exists():