From 9ec4f7504be4406297f486d78c8b5eb785506ae9 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Wed, 25 Feb 2026 02:27:41 -0800 Subject: [PATCH] Provide example datagen config scripts --- .gitignore | 1 + configs/run_browser_tasks.sh | 42 ------------ configs/run_datagen_glm4.7-imagen.sh | 26 -------- configs/run_datagen_glm4.7.sh | 26 -------- configs/run_datagen_glm4.7_megascience.sh | 27 -------- configs/run_datagen_glm4.7_raw_tasks.sh | 28 -------- configs/run_datagen_megascience.sh | 12 ---- configs/run_datagen_minimax-3.1.sh | 12 ---- configs/run_eval_glm4.7_newterm.sh | 29 --------- configs/run_eval_terminal.sh | 33 ---------- configs/run_mixed_tasks.sh | 46 ------------- configs/run_terminal_tasks.sh | 50 -------------- configs/test_run.sh | 23 ------- configs/test_skills_kimi.sh | 21 ------ .../example_browser_tasks.jsonl | 5 ++ datagen-config-examples/run_browser_tasks.sh | 65 +++++++++++++++++++ .../trajectory_compression.yaml | 0 17 files changed, 71 insertions(+), 375 deletions(-) delete mode 100755 configs/run_browser_tasks.sh delete mode 100755 configs/run_datagen_glm4.7-imagen.sh delete mode 100755 configs/run_datagen_glm4.7.sh delete mode 100755 configs/run_datagen_glm4.7_megascience.sh delete mode 100755 configs/run_datagen_glm4.7_raw_tasks.sh delete mode 100755 configs/run_datagen_megascience.sh delete mode 100755 configs/run_datagen_minimax-3.1.sh delete mode 100755 configs/run_eval_glm4.7_newterm.sh delete mode 100755 configs/run_eval_terminal.sh delete mode 100755 configs/run_mixed_tasks.sh delete mode 100755 configs/run_terminal_tasks.sh delete mode 100755 configs/test_run.sh delete mode 100644 configs/test_skills_kimi.sh create mode 100644 datagen-config-examples/example_browser_tasks.jsonl create mode 100755 datagen-config-examples/run_browser_tasks.sh rename {configs => datagen-config-examples}/trajectory_compression.yaml (100%) diff --git a/.gitignore b/.gitignore index 8e705d8e..af9d9e75 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ cli-config.yaml # Skills Hub state (lives in ~/.hermes/skills/.hub/ at runtime, but just in case) skills/.hub/ +ignored/ \ No newline at end of file diff --git a/configs/run_browser_tasks.sh b/configs/run_browser_tasks.sh deleted file mode 100755 index 14e7ad2d..00000000 --- a/configs/run_browser_tasks.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -# Browser-focused data generation run -# Uses browser-use-tasks.jsonl (6504 tasks) -# Distribution: browser 97%, web 20%, vision 12%, terminal 15% - -# Create logs directory if it doesn't exist -mkdir -p logs - -# Generate log filename with timestamp -LOG_FILE="logs/browser_tasks_$(date +%Y%m%d_%H%M%S).log" - -echo "📝 Logging output to: $LOG_FILE" -echo "🌐 Running browser-focused tasks with browser_tasks distribution" - -python batch_runner.py \ - --dataset_file="browser-use-tasks.jsonl" \ - --batch_size=20 \ - --run_name="browser_tasks" \ - --distribution="browser_tasks" \ - --model="moonshotai/kimi-k2.5" \ - --verbose \ - --base_url="https://openrouter.ai/api/v1" \ - --num_workers=50 \ - --max_turns=60 \ - --resume \ - --ephemeral_system_prompt="You are an AI assistant with browser automation capabilities. Your primary task is to navigate and interact with web pages to accomplish user goals. - -IMPORTANT GUIDELINES: - -1. SEARCHING: Do NOT try to search directly on Google or other search engines via the browser - they block automated searches. Instead, ALWAYS use the web_search tool first to find URLs for any pages you need to visit, then use browser tools to navigate to those URLs. - -2. COOKIE/PRIVACY DIALOGS: After navigating to a page, ALWAYS check if there are cookie consent dialogs, privacy popups, or overlay modals blocking the page. These appear in snapshots as 'dialog' elements with buttons like 'Close', 'Accept', 'Accept All', 'Decline', 'I Agree', 'Got it', 'OK', or 'X'. You MUST dismiss these dialogs FIRST by clicking the appropriate button before trying to interact with other page elements. After dismissing a dialog, take a fresh browser_snapshot to get updated element references. - -3. HANDLING TIMEOUTS: If an action times out, it often means the element is blocked by an overlay or the page state has changed. Take a new snapshot to see the current page state and look for any dialogs or popups that need to be dismissed. If there is no dialog box to bypass, then try a new method or report the error to the user and complete the task. - -4. GENERAL: Use browser tools to click elements, fill forms, extract information, and perform web-based tasks. If terminal is available, use it for any local file operations or computations needed to support your web tasks. Be thorough in verifying your actions and handle any errors gracefully by retrying or trying alternative approaches." \ - 2>&1 | tee "$LOG_FILE" - -echo "✅ Log saved to: $LOG_FILE" - -# --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \ \ No newline at end of file diff --git a/configs/run_datagen_glm4.7-imagen.sh b/configs/run_datagen_glm4.7-imagen.sh deleted file mode 100755 index 6555278d..00000000 --- a/configs/run_datagen_glm4.7-imagen.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -# Create logs directory if it doesn't exist -mkdir -p logs - -# Generate a timestamp for the log file -TIMESTAMP=$(date +%Y%m%d_%H%M%S) -LOG_FILE="logs/imagen_eval_gpt5_${TIMESTAMP}.log" - -echo "📝 Logging output to: $LOG_FILE" - -python batch_runner.py \ - --dataset_file="source-data/hermes-agent-imagen-data/hermes_agent_imagen_train_sft.jsonl" \ - --batch_size=20 \ - --run_name="imagen_train_sft_glm4.7" \ - --distribution="image_gen" \ - --model="z-ai/glm-4.7" \ - --base_url="https://openrouter.ai/api/v1" \ - --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \ - --num_workers=50 \ - --max_turns=25 \ - --ephemeral_system_prompt="When generating an image for the user view the image by using the vision_analyze tool to ensure it is what the user wanted. If it isn't feel free to retry a few times. If none are perfect, choose the best option that is the closest match, and explain its imperfections. If the image generation tool fails, try again a few times. If the vision analyze tool fails, provide the image to the user and explain it is your best effort attempt." \ - 2>&1 | tee "$LOG_FILE" - -echo "✅ Log saved to: $LOG_FILE" -# --verbose \ \ No newline at end of file diff --git a/configs/run_datagen_glm4.7.sh b/configs/run_datagen_glm4.7.sh deleted file mode 100755 index 6224c481..00000000 --- a/configs/run_datagen_glm4.7.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -# Create logs directory if it doesn't exist -mkdir -p logs - -# Generate log filename with timestamp -LOG_FILE="logs/glm4.7-thinking-sft1_$(date +%Y%m%d_%H%M%S).log" - -echo "📝 Logging output to: $LOG_FILE" - -python batch_runner.py \ - --dataset_file="source-data/hermes-agent-agent-tasks-1/agent_tasks_sft_2.jsonl" \ - --batch_size=20 \ - --run_name="megascience_glm4.7-thinking-sft2" \ - --distribution="science" \ - --model="z-ai/glm-4.7" \ - --base_url="https://openrouter.ai/api/v1" \ - --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \ - --num_workers=15 \ - --max_turns=60 \ - --ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use the terminal or search tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work. Always pip install any packages you need to use the python scripts you want to run. If you need to use a tool that isn't available, you can use the terminal tool to install or create it in many cases as well. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Search for at least 3 sources, but not more than 12, so you can maintain focused context." \ - 2>&1 | tee "$LOG_FILE" - -echo "✅ Log saved to: $LOG_FILE" - -# --verbose \ \ No newline at end of file diff --git a/configs/run_datagen_glm4.7_megascience.sh b/configs/run_datagen_glm4.7_megascience.sh deleted file mode 100755 index 1e56c468..00000000 --- a/configs/run_datagen_glm4.7_megascience.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -# Create logs directory if it doesn't exist -mkdir -p logs - -# Generate log filename with timestamp -LOG_FILE="logs/glm4.7-thinking-sft1-10k_$(date +%Y%m%d_%H%M%S).log" - -echo "📝 Logging output to: $LOG_FILE" - -python batch_runner.py \ - --dataset_file="source-data/hermes-agent-megascience-data/hermes_agent_megascience_sft_train_1_10k.jsonl" \ - --batch_size=20 \ - --run_name="megascience_glm4.7-thinking-sft1" \ - --distribution="science" \ - --model="z-ai/glm-4.7" \ - --base_url="https://openrouter.ai/api/v1" \ - --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \ - --num_workers=50 \ - --max_turns=60 \ - --resume \ - --ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used for furthering results. Always use the terminal or search tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work. Always pip install any packages you need to use the python scripts you want to run. If you need to use a tool that isn't available, you can use the terminal tool to install or create it in many cases as well. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Search for at least 3 sources, but not more than 12, so you can maintain a focused context." \ - 2>&1 | tee "$LOG_FILE" - -echo "✅ Log saved to: $LOG_FILE" - -# --verbose \ \ No newline at end of file diff --git a/configs/run_datagen_glm4.7_raw_tasks.sh b/configs/run_datagen_glm4.7_raw_tasks.sh deleted file mode 100755 index 03c6676f..00000000 --- a/configs/run_datagen_glm4.7_raw_tasks.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash - -# Create logs directory if it doesn't exist -mkdir -p logs - -# Generate log filename with timestamp -LOG_FILE="logs/glm4.7-terminal-tasks_$(date +%Y%m%d_%H%M%S).log" - -echo "📝 Logging output to: $LOG_FILE" - -python batch_runner.py \ - --dataset_file="source-data/raw_tasks_prompts.jsonl" \ - --batch_size=20 \ - --run_name="terminal-tasks-glm4.7-thinking" \ - --distribution="default" \ - --model="z-ai/glm-4.7" \ - --base_url="https://openrouter.ai/api/v1" \ - --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \ - --num_workers=50 \ - --max_turns=60 \ - --ephemeral_system_prompt="You have access to a variety of tools to help you complete coding, system administration, and general computing tasks. You can use them in sequence and build off of the results of prior tools you've used. Always use the terminal tool to execute commands, write code, install packages, and verify your work. You should test and validate everything you create. Always pip install any packages you need (use --break-system-packages if needed). If you need a tool that isn't available, you can use the terminal to install or create it. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Use web search when you need to look up documentation, APIs, or current best practices." \ - 2>&1 | tee "$LOG_FILE" - -echo "✅ Log saved to: $LOG_FILE" - -# --verbose \ -# --resume \ - diff --git a/configs/run_datagen_megascience.sh b/configs/run_datagen_megascience.sh deleted file mode 100755 index da1e8e1f..00000000 --- a/configs/run_datagen_megascience.sh +++ /dev/null @@ -1,12 +0,0 @@ -python batch_runner.py \ - --dataset_file="hermes-agent-megascience-data/hermes_agent_megascience_eval.jsonl" \ - --batch_size=10 \ - --run_name="megascience_eval_gpt5_2" \ - --distribution="science" \ - --model="gpt-5" \ - --base_url="https://api.openai.com/v1" \ - --api_key="${OPENAI_API_KEY}" \ - --num_workers=5 \ - --max_turns=30 \ - --verbose \ - --ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use a tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should not be confident in your own reasoning, knowledge, or calculations without using a tool to verify or validate your work." \ No newline at end of file diff --git a/configs/run_datagen_minimax-3.1.sh b/configs/run_datagen_minimax-3.1.sh deleted file mode 100755 index 39f203af..00000000 --- a/configs/run_datagen_minimax-3.1.sh +++ /dev/null @@ -1,12 +0,0 @@ -python batch_runner.py \ - --dataset_file="source-data/hermes-agent-agent-tasks-1/agent_tasks_eval.jsonl" \ - --batch_size=50 \ - --run_name="megascience_sft_minimax-m2.1-thinking-2-eval" \ - --distribution="science" \ - --model="minimax/minimax-m2.1" \ - --base_url="https://openrouter.ai/api/v1" \ - --providers_allowed="minimax" \ - --num_workers=1 \ - --max_turns=40 \ - --verbose \ - --ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use the terminal or search tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work. Always pip install any packages you need to use the python scripts you want to run. If you need to use a tool that isn't available, you can use the terminal tool to install or create it in many cases as well. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Search for at least 3 sources, but not more than 12." \ No newline at end of file diff --git a/configs/run_eval_glm4.7_newterm.sh b/configs/run_eval_glm4.7_newterm.sh deleted file mode 100755 index 735758b6..00000000 --- a/configs/run_eval_glm4.7_newterm.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# Create logs directory if it doesn't exist -mkdir -p logs - -# Generate log filename with timestamp -LOG_FILE="logs/glm4.7-terminal-tasks-newterm_$(date +%Y%m%d_%H%M%S).log" - -echo "📝 Logging output to: $LOG_FILE" - -python batch_runner.py \ - --dataset_file="source-data/hermes-agent-agent-tasks-1/agent_tasks_eval.jsonl" \ - --batch_size=1 \ - --run_name="terminal-tasks-test-newterm" \ - --distribution="terminal_only" \ - --verbose \ - --model="z-ai/glm-4.7" \ - --base_url="https://openrouter.ai/api/v1" \ - --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \ - --num_workers=5 \ - --max_turns=60 \ - --ephemeral_system_prompt="You have access to a variety of tools to help you complete coding, system administration, and general computing tasks. You can use them in sequence and build off of the results of prior tools you've used. Always use the terminal tool to execute commands, write code, install packages, and verify your work. You should test and validate everything you create. Always pip install any packages you need (use --break-system-packages if needed). If you need a tool that isn't available, you can use the terminal to install or create it. Do not use the terminal tool to communicate with the user, as they cannot see your commands, only your final response after completing the task. Use web search when you need to look up documentation, APIs, or current best practices." \ - 2>&1 | tee "$LOG_FILE" - -echo "✅ Log saved to: $LOG_FILE" - -# --verbose \ -# --resume \ - diff --git a/configs/run_eval_terminal.sh b/configs/run_eval_terminal.sh deleted file mode 100755 index 0cf6a1f6..00000000 --- a/configs/run_eval_terminal.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# Terminal-only evaluation run using Modal sandboxes -# Uses 10 sample tasks from nous-terminal-tasks - -# Create logs directory if it doesn't exist -mkdir -p logs - -# Generate log filename with timestamp -LOG_FILE="logs/terminal_eval_$(date +%Y%m%d_%H%M%S).log" - -echo "📝 Logging output to: $LOG_FILE" -echo "🔧 Using Modal sandboxes (TERMINAL_ENV=modal)" - -# Set terminal to use Modal -export TERMINAL_ENV=modal -export TERMINAL_MODAL_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20 -export TERMINAL_TIMEOUT=300 - -python batch_runner.py \ - --dataset_file="nous-terminal-tasks_eval.jsonl" \ - --batch_size=5 \ - --run_name="terminal_eval" \ - --distribution="terminal_only" \ - --model="z-ai/glm-4.7" \ - --base_url="https://openrouter.ai/api/v1" \ - --providers_allowed="gmicloud,siliconflow,atlas-cloud,z-ai,novita" \ - --num_workers=2 \ - --max_turns=30 \ - --ephemeral_system_prompt="You have access to a terminal tool for executing commands. Use it to complete the task. Install any packages you need with apt-get or pip (use --break-system-packages if needed). Do not use interactive tools (vim, nano, python repl). If git output is large, pipe to cat." \ - 2>&1 | tee "$LOG_FILE" - -echo "✅ Log saved to: $LOG_FILE" diff --git a/configs/run_mixed_tasks.sh b/configs/run_mixed_tasks.sh deleted file mode 100755 index 39ad8cf5..00000000 --- a/configs/run_mixed_tasks.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# Mixed browser+terminal data generation run -# Uses mixed-browser-terminal-tasks.jsonl (200 tasks) -# Distribution: browser 92%, terminal 92%, web 35%, vision 15%, image_gen 15% - -# Create logs directory if it doesn't exist -mkdir -p logs - -# Generate log filename with timestamp -LOG_FILE="logs/mixed_tasks_$(date +%Y%m%d_%H%M%S).log" - -echo "📝 Logging output to: $LOG_FILE" -echo "🔀 Running mixed browser+terminal tasks with mixed_tasks distribution" - -# Set terminal environment -# SIF images are automatically built/cached by terminal_tool.py -export TERMINAL_ENV=singularity -export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20" -export TERMINAL_TIMEOUT=300 - -# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp) -if [ -d "/scratch" ] && [ -w "/scratch" ]; then - CACHE_BASE="/scratch/$USER/.apptainer" -else - CACHE_BASE="/tmp/$USER/.apptainer" -fi -export APPTAINER_CACHEDIR="$CACHE_BASE" -export APPTAINER_TMPDIR="$CACHE_BASE/tmp" -mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR" - -echo "📁 Apptainer cache: $APPTAINER_CACHEDIR" - -python batch_runner.py \ - --dataset_file="mixed-browser-terminal-tasks.jsonl" \ - --batch_size=20 \ - --run_name="mixed_tasks" \ - --distribution="mixed_tasks" \ - --model="moonshotai/kimi-k2.5" \ - --base_url="https://openrouter.ai/api/v1" \ - --num_workers=25 \ - --max_turns=60 \ - --ephemeral_system_prompt="You are an AI assistant capable of both browser automation and terminal operations. Use browser tools to navigate websites, interact with web pages, fill forms, and extract information. Use terminal tools to execute commands, write and run code, install packages (use --break-system-packages with pip if needed), and perform local computations. When web search is available, use it to find URLs, documentation, or current information. If vision is available, use it to analyze images or screenshots. If image generation is available, use it when the task requires creating images. Combine browser and terminal capabilities effectively - for example, you might use the browser to fetch data from a website and terminal to process or analyze it. Always verify your work and handle errors gracefully. Whenever you can do something in a terminal instead of a web browser, you should choose to do so, as it's much cheaper." \ - 2>&1 | tee "$LOG_FILE" - -echo "✅ Log saved to: $LOG_FILE" diff --git a/configs/run_terminal_tasks.sh b/configs/run_terminal_tasks.sh deleted file mode 100755 index 7ac8a669..00000000 --- a/configs/run_terminal_tasks.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -# Terminal-focused data generation run -# Uses nous-terminal-tasks.jsonl (597 tasks) -# Distribution: terminal 97%, web 15%, browser 0%, vision 8%, image_gen 3% - -# Create logs directory if it doesn't exist -mkdir -p logs - -# Generate log filename with timestamp -LOG_FILE="logs/terminal_tasks_$(date +%Y%m%d_%H%M%S).log" - -echo "📝 Logging output to: $LOG_FILE" -echo "💻 Running terminal-focused tasks with terminal_tasks distribution" - -# Set terminal environment -# SIF images are automatically built/cached by terminal_tool.py -export TERMINAL_ENV=singularity -export TERMINAL_SINGULARITY_IMAGE="docker://nikolaik/python-nodejs:python3.11-nodejs20" -export TERMINAL_TIMEOUT=300 - -# Set up Apptainer cache directories (use /scratch if available, otherwise /tmp) -if [ -d "/scratch" ] && [ -w "/scratch" ]; then - CACHE_BASE="/scratch/$USER/.apptainer" -else - CACHE_BASE="/tmp/$USER/.apptainer" -fi -export APPTAINER_CACHEDIR="$CACHE_BASE" -export APPTAINER_TMPDIR="$CACHE_BASE/tmp" -mkdir -p "$APPTAINER_CACHEDIR" "$APPTAINER_TMPDIR" - -echo "📁 Apptainer cache: $APPTAINER_CACHEDIR" -echo "🐳 Image: $TERMINAL_SINGULARITY_IMAGE (auto-converted to SIF on first use)" - -python batch_runner.py \ - --dataset_file="nous-terminal-tasks.jsonl" \ - --batch_size=5 \ - --run_name="terminal_tasks-kimi-k2.5" \ - --distribution="terminal_tasks" \ - --model="moonshotai/kimi-k2.5" \ - --verbose \ - --base_url="https://openrouter.ai/api/v1" \ - --num_workers=80 \ - --max_turns=60 \ - --providers_ignored="Novita" \ - --resume \ - --ephemeral_system_prompt="You have access to a terminal tool for executing commands and completing coding, system administration, and computing tasks. Use the terminal to write code, run scripts, install packages (use --break-system-packages with pip if needed), manipulate files, and verify your work. Always test and validate code you create. Do not use interactive tools like vim, nano, or python REPL. If git output is large, pipe to cat. When web search is available, use it to look up documentation, APIs, or best practices. If browser tools are available, use them for web interactions that require page manipulation. Do not use the terminal to communicate with the user - only your final response will be shown to them." \ - 2>&1 | tee "$LOG_FILE" - -echo "✅ Log saved to: $LOG_FILE" diff --git a/configs/test_run.sh b/configs/test_run.sh deleted file mode 100755 index 66be76d5..00000000 --- a/configs/test_run.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -# Check if a prompt argument was provided -if [ $# -eq 0 ]; then - echo "Error: Please provide a prompt as an argument" - echo "Usage: $0 \"your prompt here\"" - exit 1 -fi - -# Get the prompt from the first argument -PROMPT="$1" - -# Set debug mode for web tools -export WEB_TOOLS_DEBUG=true - -# Run the agent with the provided prompt -python run_agent.py \ - --query "$PROMPT" \ - --max_turns 30 \ - --model claude-sonnet-4-5-20250929 \ - --base_url https://api.anthropic.com/v1/ \ - --api_key $ANTHROPIC_API_KEY \ - --save_trajectories \ No newline at end of file diff --git a/configs/test_skills_kimi.sh b/configs/test_skills_kimi.sh deleted file mode 100644 index f299b476..00000000 --- a/configs/test_skills_kimi.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash - -# Test skills tool with Kimi K2.5 -# Usage: ./configs/test_skills_kimi.sh "your query here" -# Example: ./configs/test_skills_kimi.sh "List available skills and show me the vllm skill" - -# Default query if none provided -QUERY="${1:-List all available skills. Then show me the axolotl skill and view one of its reference files.}" - -echo "🎯 Testing Skills Tool with Kimi K2.5" -echo "📝 Query: $QUERY" -echo "=" - -python run_agent.py \ - --enabled_toolsets=skills \ - --model="moonshotai/kimi-k2.5" \ - --base_url="https://openrouter.ai/api/v1" \ - --max_turns=10 \ - --verbose \ - --save_sample \ - --query="$QUERY" diff --git a/datagen-config-examples/example_browser_tasks.jsonl b/datagen-config-examples/example_browser_tasks.jsonl new file mode 100644 index 00000000..04c2848c --- /dev/null +++ b/datagen-config-examples/example_browser_tasks.jsonl @@ -0,0 +1,5 @@ +{"prompt": "Go to https://news.ycombinator.com and find the top 5 posts on the front page. For each post, get the title, URL, points, and number of comments. Return the results as a formatted summary."} +{"prompt": "Navigate to https://en.wikipedia.org/wiki/Hermes and extract the first paragraph of the article, the image caption, and the list of items in the infobox. Summarize what you find."} +{"prompt": "Go to https://github.com/trending and find the top 3 trending repositories today. For each repo, get the name, description, language, and star count. Write the results to a file called trending_repos.md."} +{"prompt": "Visit https://httpbin.org/forms/post and fill out the form with sample data (customer name: Jane Doe, size: Medium, topping: Bacon, delivery time: 12:00). Submit the form and report what the response page shows."} +{"prompt": "Navigate to https://books.toscrape.com, browse to the Travel category, find the highest-rated book, and extract its title, price, availability, and description."} diff --git a/datagen-config-examples/run_browser_tasks.sh b/datagen-config-examples/run_browser_tasks.sh new file mode 100755 index 00000000..a66e416d --- /dev/null +++ b/datagen-config-examples/run_browser_tasks.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +# ============================================================================= +# Example: Browser-Focused Data Generation +# ============================================================================= +# +# Generates tool-calling trajectories for browser automation tasks. +# The agent navigates websites, fills forms, extracts information, etc. +# +# Distribution: browser 97%, web 20%, vision 12%, terminal 15% +# +# Prerequisites: +# - OPENROUTER_API_KEY in ~/.hermes/.env +# - BROWSERBASE_API_KEY in ~/.hermes/.env (for browser tools) +# - A dataset JSONL file with one {"prompt": "..."} per line +# +# Usage: +# cd ~/.hermes/hermes-agent +# bash datagen-config-examples/run_browser_tasks.sh +# +# Output: data/browser_tasks_example/trajectories.jsonl +# ============================================================================= + +mkdir -p logs + +LOG_FILE="logs/browser_tasks_$(date +%Y%m%d_%H%M%S).log" +echo "📝 Logging to: $LOG_FILE" + +# Point to the example dataset in this directory +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" + +python batch_runner.py \ + --dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \ + --batch_size=5 \ + --run_name="browser_tasks_example" \ + --distribution="browser_tasks" \ + --model="anthropic/claude-sonnet-4" \ + --base_url="https://openrouter.ai/api/v1" \ + --num_workers=3 \ + --max_turns=30 \ + --ephemeral_system_prompt="You are an AI assistant with browser automation capabilities. Your primary task is to navigate and interact with web pages to accomplish user goals. + +IMPORTANT GUIDELINES: + +1. SEARCHING: Do NOT search directly on Google via the browser — they block automated searches. Use the web_search tool first to find URLs, then navigate to them with browser tools. + +2. COOKIE/PRIVACY DIALOGS: After navigating to a page, check for cookie consent or privacy popups. Dismiss them by clicking Accept/Close/OK before interacting with other elements. Take a fresh browser_snapshot afterward. + +3. HANDLING TIMEOUTS: If an action times out, the element may be blocked by an overlay. Take a new snapshot and look for dialogs to dismiss. If none, try an alternative approach or report the issue. + +4. GENERAL: Use browser tools to click, fill forms, and extract information. Use terminal for local file operations. Verify your actions and handle errors gracefully." \ + 2>&1 | tee "$LOG_FILE" + +echo "✅ Done. Log: $LOG_FILE" + +# ============================================================================= +# Common options you can add: +# +# --resume Resume from checkpoint if interrupted +# --verbose Enable detailed logging +# --max_tokens=63000 Set max response tokens +# --reasoning_disabled Disable model thinking/reasoning tokens +# --providers_allowed="anthropic,google" Restrict to specific providers +# --prefill_messages_file="configs/prefill.json" Few-shot priming +# ============================================================================= diff --git a/configs/trajectory_compression.yaml b/datagen-config-examples/trajectory_compression.yaml similarity index 100% rename from configs/trajectory_compression.yaml rename to datagen-config-examples/trajectory_compression.yaml