update webtools

This commit is contained in:
teknium
2025-11-02 06:03:21 +00:00
parent d4544f08c5
commit f6f75cbe2b
5 changed files with 658 additions and 622 deletions

View File

@@ -3,4 +3,4 @@ openai
fal-client
python-dotenv
fire
requests
httpx

12
run_datagen_megascience.sh Executable file
View File

@@ -0,0 +1,12 @@
python batch_runner.py \
--dataset_file="hermes-agent-megascience-data/hermes_agent_megascience_eval.jsonl" \
--batch_size=10 \
--run_name="megascience_eval_gpt5_2" \
--distribution="science" \
--model="gpt-5" \
--base_url="https://api.openai.com/v1" \
--api_key="${OPENAI_API_KEY}" \
--num_workers=5 \
--max_turns=30 \
--verbose \
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use a tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should not be confident in your own reasoning, knowledge, or calculations without using a tool to verify or validate your work."

View File

@@ -0,0 +1,12 @@
python batch_runner.py \
--dataset_file="hermes-agent-megascience-data/hermes_agent_megascience_eval.jsonl" \
--batch_size=10 \
--run_name="megascience_eval_glm4-6-fixedterminal" \
--distribution="science" \
--model="z-ai/glm-4.6" \
--base_url="https://openrouter.ai/api/v1" \
--api_key="${OPENROUTER_API_KEY}" \
--num_workers=5 \
--max_turns=30 \
--verbose \
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use a tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work."

File diff suppressed because it is too large Load Diff

View File

@@ -61,7 +61,19 @@ DISTRIBUTIONS = {
"terminal": 10 # 10% chance of terminal tools
}
},
# Scientific problem solving focused distribution
"science": {
"description": "Web research with vision analysis and reasoning",
"toolsets": {
"web": 94, # 90% chance of web tools
"vision": 50, # 50% chance of vision tools
"moa": 10, # 40% chance of reasoning tools
"terminal": 94, # 10% chance of terminal tools
"image_gen": 15 # 80% chance of image generation tools
}
},
# Development-focused distribution
"development": {
"description": "Terminal and reasoning with occasional web lookup",