update webtools
This commit is contained in:
@@ -3,4 +3,4 @@ openai
|
||||
fal-client
|
||||
python-dotenv
|
||||
fire
|
||||
requests
|
||||
httpx
|
||||
12
run_datagen_megascience.sh
Executable file
12
run_datagen_megascience.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
python batch_runner.py \
|
||||
--dataset_file="hermes-agent-megascience-data/hermes_agent_megascience_eval.jsonl" \
|
||||
--batch_size=10 \
|
||||
--run_name="megascience_eval_gpt5_2" \
|
||||
--distribution="science" \
|
||||
--model="gpt-5" \
|
||||
--base_url="https://api.openai.com/v1" \
|
||||
--api_key="${OPENAI_API_KEY}" \
|
||||
--num_workers=5 \
|
||||
--max_turns=30 \
|
||||
--verbose \
|
||||
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use a tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should not be confident in your own reasoning, knowledge, or calculations without using a tool to verify or validate your work."
|
||||
12
run_datagen_megascience_glm4-6.sh
Executable file
12
run_datagen_megascience_glm4-6.sh
Executable file
@@ -0,0 +1,12 @@
|
||||
python batch_runner.py \
|
||||
--dataset_file="hermes-agent-megascience-data/hermes_agent_megascience_eval.jsonl" \
|
||||
--batch_size=10 \
|
||||
--run_name="megascience_eval_glm4-6-fixedterminal" \
|
||||
--distribution="science" \
|
||||
--model="z-ai/glm-4.6" \
|
||||
--base_url="https://openrouter.ai/api/v1" \
|
||||
--api_key="${OPENROUTER_API_KEY}" \
|
||||
--num_workers=5 \
|
||||
--max_turns=30 \
|
||||
--verbose \
|
||||
--ephemeral_system_prompt="You have access to a variety of tools to help you solve scientific, math, and technology problems presented to you. You can use them in sequence and build off of the results of prior tools you've used results. Always use a tool if it can provide additional context, verify formulas, double check concepts and recent studies and understanding, doing all calculations, etc. You should only be confident in your own reasoning, knowledge, or calculations if you've exhaustively used all tools available to you to that can help you verify or validate your work."
|
||||
File diff suppressed because it is too large
Load Diff
@@ -61,7 +61,19 @@ DISTRIBUTIONS = {
|
||||
"terminal": 10 # 10% chance of terminal tools
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
# Scientific problem solving focused distribution
|
||||
"science": {
|
||||
"description": "Web research with vision analysis and reasoning",
|
||||
"toolsets": {
|
||||
"web": 94, # 90% chance of web tools
|
||||
"vision": 50, # 50% chance of vision tools
|
||||
"moa": 10, # 40% chance of reasoning tools
|
||||
"terminal": 94, # 10% chance of terminal tools
|
||||
"image_gen": 15 # 80% chance of image generation tools
|
||||
}
|
||||
},
|
||||
|
||||
# Development-focused distribution
|
||||
"development": {
|
||||
"description": "Terminal and reasoning with occasional web lookup",
|
||||
|
||||
Reference in New Issue
Block a user