- Introduced mini_swe_runner.py for executing tasks using mini-swe-agent environments (local, Docker, Modal) and outputting trajectories in Hermes format. - Implemented trajectory_compressor.py to post-process agent trajectories, compressing them within a target token budget while preserving essential content. - Added trajectory_compression.yaml configuration file for customizable compression settings. - Created sample_and_compress.py script to download, sample, and compress trajectories from HuggingFace datasets. - Enhanced logging and error handling across new modules for improved usability and debugging.
705 lines
26 KiB
Python
705 lines
26 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Mini-SWE-Agent Runner with Hermes Trajectory Format
|
|
|
|
This module provides a runner that uses mini-swe-agent's execution environments
|
|
(local, docker, modal) but outputs trajectories in the Hermes-Agent format
|
|
compatible with batch_runner.py and trajectory_compressor.py.
|
|
|
|
Features:
|
|
- Uses mini-swe-agent's Docker, Modal, or Local environments for command execution
|
|
- Outputs trajectories in Hermes format (from/value pairs with <tool_call>/<tool_response> XML)
|
|
- Compatible with the trajectory compression pipeline
|
|
- Supports batch processing from JSONL prompt files
|
|
|
|
Usage:
|
|
# Run a single task with local environment
|
|
python mini_swe_runner.py --task "Create a hello world Python script" --env local
|
|
|
|
# Run with Docker
|
|
python mini_swe_runner.py --task "List files in /tmp" --env docker --image python:3.11-slim
|
|
|
|
# Run with Modal (cloud)
|
|
python mini_swe_runner.py --task "Install numpy and test it" --env modal --image python:3.11-slim
|
|
|
|
# Batch mode from JSONL file
|
|
python mini_swe_runner.py --prompts_file prompts.jsonl --output_file trajectories.jsonl --env docker
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
import time
|
|
import uuid
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import List, Dict, Any, Optional, Literal
|
|
|
|
import fire
|
|
from dotenv import load_dotenv
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
# Add mini-swe-agent to path if not installed
|
|
mini_swe_path = Path(__file__).parent / "mini-swe-agent" / "src"
|
|
if mini_swe_path.exists():
|
|
sys.path.insert(0, str(mini_swe_path))
|
|
|
|
|
|
# ============================================================================
|
|
# Terminal Tool Definition (matches Hermes-Agent format)
|
|
# ============================================================================
|
|
|
|
TERMINAL_TOOL_DEFINITION = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "terminal",
|
|
"description": """Execute bash commands in a sandboxed environment.
|
|
|
|
**Environment:**
|
|
- Isolated execution environment (local, Docker, or Modal cloud)
|
|
- Filesystem persists between tool calls within the same task
|
|
- Internet access available
|
|
|
|
**Command Execution:**
|
|
- Provide the command to execute via the 'command' parameter
|
|
- Optional 'timeout' parameter in seconds (default: 60)
|
|
|
|
**Examples:**
|
|
- Run command: `{"command": "ls -la"}`
|
|
- With timeout: `{"command": "long_task.sh", "timeout": 300}`
|
|
|
|
**Best Practices:**
|
|
- Use non-interactive commands (avoid vim, nano, interactive python)
|
|
- Pipe to cat if output might be large
|
|
- Install tools with apt-get or pip as needed
|
|
|
|
**Completion:**
|
|
- When task is complete, output: echo "MINI_SWE_AGENT_FINAL_OUTPUT" followed by your result
|
|
""",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"command": {
|
|
"type": "string",
|
|
"description": "The bash command to execute"
|
|
},
|
|
"timeout": {
|
|
"type": "integer",
|
|
"description": "Command timeout in seconds (default: 60)"
|
|
}
|
|
},
|
|
"required": ["command"]
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
# ============================================================================
|
|
# Environment Factory
|
|
# ============================================================================
|
|
|
|
def create_environment(
|
|
env_type: str = "local",
|
|
image: str = "python:3.11-slim",
|
|
cwd: str = "/tmp",
|
|
timeout: int = 60,
|
|
**kwargs
|
|
):
|
|
"""
|
|
Create an execution environment from mini-swe-agent.
|
|
|
|
Args:
|
|
env_type: One of "local", "docker", "modal"
|
|
image: Docker/Modal image name (ignored for local)
|
|
cwd: Working directory
|
|
timeout: Default command timeout
|
|
**kwargs: Additional environment-specific options
|
|
|
|
Returns:
|
|
Environment instance with execute() method
|
|
"""
|
|
if env_type == "local":
|
|
from minisweagent.environments.local import LocalEnvironment
|
|
return LocalEnvironment(cwd=cwd, timeout=timeout)
|
|
|
|
elif env_type == "docker":
|
|
from minisweagent.environments.docker import DockerEnvironment
|
|
return DockerEnvironment(image=image, cwd=cwd, timeout=timeout, **kwargs)
|
|
|
|
elif env_type == "modal":
|
|
from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
|
|
return SwerexModalEnvironment(image=image, cwd=cwd, timeout=timeout, **kwargs)
|
|
|
|
else:
|
|
raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', or 'modal'")
|
|
|
|
|
|
# ============================================================================
|
|
# Mini-SWE Runner with Hermes Trajectory Format
|
|
# ============================================================================
|
|
|
|
class MiniSWERunner:
|
|
"""
|
|
Agent runner that uses mini-swe-agent environments but outputs
|
|
trajectories in Hermes-Agent format.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
model: str = "claude-sonnet-4-20250514",
|
|
base_url: str = None,
|
|
api_key: str = None,
|
|
env_type: str = "local",
|
|
image: str = "python:3.11-slim",
|
|
cwd: str = "/tmp",
|
|
max_iterations: int = 15,
|
|
command_timeout: int = 60,
|
|
verbose: bool = False,
|
|
):
|
|
"""
|
|
Initialize the Mini-SWE Runner.
|
|
|
|
Args:
|
|
model: Model name for OpenAI-compatible API
|
|
base_url: API base URL (optional, uses env vars if not provided)
|
|
api_key: API key (optional, uses env vars if not provided)
|
|
env_type: Environment type - "local", "docker", or "modal"
|
|
image: Docker/Modal image (ignored for local)
|
|
cwd: Working directory for commands
|
|
max_iterations: Maximum tool-calling iterations
|
|
command_timeout: Default timeout for commands
|
|
verbose: Enable verbose logging
|
|
"""
|
|
self.model = model
|
|
self.max_iterations = max_iterations
|
|
self.command_timeout = command_timeout
|
|
self.verbose = verbose
|
|
self.env_type = env_type
|
|
self.image = image
|
|
self.cwd = cwd
|
|
|
|
# Setup logging
|
|
logging.basicConfig(
|
|
level=logging.DEBUG if verbose else logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
datefmt='%H:%M:%S'
|
|
)
|
|
self.logger = logging.getLogger(__name__)
|
|
|
|
# Initialize OpenAI client
|
|
from openai import OpenAI
|
|
|
|
client_kwargs = {}
|
|
if base_url:
|
|
client_kwargs["base_url"] = base_url
|
|
|
|
# Handle API key with fallbacks
|
|
if api_key:
|
|
client_kwargs["api_key"] = api_key
|
|
else:
|
|
client_kwargs["api_key"] = os.getenv(
|
|
"OPENROUTER_API_KEY",
|
|
os.getenv("ANTHROPIC_API_KEY", os.getenv("OPENAI_API_KEY", ""))
|
|
)
|
|
|
|
self.client = OpenAI(**client_kwargs)
|
|
|
|
# Environment will be created per-task
|
|
self.env = None
|
|
|
|
# Tool definition
|
|
self.tools = [TERMINAL_TOOL_DEFINITION]
|
|
|
|
print(f"🤖 Mini-SWE Runner initialized")
|
|
print(f" Model: {self.model}")
|
|
print(f" Environment: {self.env_type}")
|
|
if self.env_type != "local":
|
|
print(f" Image: {self.image}")
|
|
print(f" Max iterations: {self.max_iterations}")
|
|
|
|
def _create_env(self):
|
|
"""Create the execution environment."""
|
|
print(f"🔧 Creating {self.env_type} environment...")
|
|
self.env = create_environment(
|
|
env_type=self.env_type,
|
|
image=self.image,
|
|
cwd=self.cwd,
|
|
timeout=self.command_timeout
|
|
)
|
|
print(f"✅ Environment ready")
|
|
|
|
def _cleanup_env(self):
|
|
"""Cleanup the execution environment."""
|
|
if self.env is not None:
|
|
if hasattr(self.env, 'cleanup'):
|
|
self.env.cleanup()
|
|
elif hasattr(self.env, 'stop'):
|
|
self.env.stop()
|
|
self.env = None
|
|
|
|
def _execute_command(self, command: str, timeout: int = None) -> Dict[str, Any]:
|
|
"""
|
|
Execute a command in the environment.
|
|
|
|
Args:
|
|
command: Bash command to execute
|
|
timeout: Optional timeout override
|
|
|
|
Returns:
|
|
Dict with 'output' and 'returncode'
|
|
"""
|
|
if self.env is None:
|
|
self._create_env()
|
|
|
|
try:
|
|
result = self.env.execute(command, timeout=timeout or self.command_timeout)
|
|
return {
|
|
"output": result.get("output", ""),
|
|
"exit_code": result.get("returncode", 0),
|
|
"error": None
|
|
}
|
|
except Exception as e:
|
|
return {
|
|
"output": "",
|
|
"exit_code": -1,
|
|
"error": str(e)
|
|
}
|
|
|
|
def _format_tools_for_system_message(self) -> str:
|
|
"""Format tool definitions for the system message."""
|
|
formatted_tools = []
|
|
for tool in self.tools:
|
|
func = tool["function"]
|
|
formatted_tools.append({
|
|
"name": func["name"],
|
|
"description": func.get("description", ""),
|
|
"parameters": func.get("parameters", {}),
|
|
"required": None
|
|
})
|
|
return json.dumps(formatted_tools, ensure_ascii=False)
|
|
|
|
def _convert_to_hermes_format(
|
|
self,
|
|
messages: List[Dict[str, Any]],
|
|
user_query: str,
|
|
completed: bool
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Convert internal message format to Hermes trajectory format.
|
|
|
|
This produces the exact format used by batch_runner.py.
|
|
"""
|
|
trajectory = []
|
|
|
|
# System message with tool definitions
|
|
system_msg = (
|
|
"You are a function calling AI model. You are provided with function signatures within <tools> </tools> XML tags. "
|
|
"You may call one or more functions to assist with the user query. If available tools are not relevant in assisting "
|
|
"with user query, just respond in natural conversational language. Don't make assumptions about what values to plug "
|
|
"into functions. After calling & executing the functions, you will be provided with function results within "
|
|
"<tool_response> </tool_response> XML tags. Here are the available tools:\n"
|
|
f"<tools>\n{self._format_tools_for_system_message()}\n</tools>\n"
|
|
"For each function call return a JSON object, with the following pydantic model json schema for each:\n"
|
|
"{'title': 'FunctionCall', 'type': 'object', 'properties': {'name': {'title': 'Name', 'type': 'string'}, "
|
|
"'arguments': {'title': 'Arguments', 'type': 'object'}}, 'required': ['name', 'arguments']}\n"
|
|
"Each function call should be enclosed within <tool_call> </tool_call> XML tags.\n"
|
|
"Example:\n<tool_call>\n{'name': <function-name>,'arguments': <args-dict>}\n</tool_call>"
|
|
)
|
|
|
|
trajectory.append({"from": "system", "value": system_msg})
|
|
trajectory.append({"from": "human", "value": user_query})
|
|
|
|
# Process messages (skip first user message as we already added it)
|
|
i = 1
|
|
while i < len(messages):
|
|
msg = messages[i]
|
|
|
|
if msg["role"] == "assistant":
|
|
if "tool_calls" in msg and msg["tool_calls"]:
|
|
# Assistant message with tool calls
|
|
content = ""
|
|
|
|
# Add reasoning if present
|
|
if msg.get("reasoning"):
|
|
content = f"<think>{msg['reasoning']}</think>"
|
|
|
|
if msg.get("content"):
|
|
content += msg["content"] + "\n"
|
|
|
|
# Add tool calls in XML format
|
|
for tool_call in msg["tool_calls"]:
|
|
try:
|
|
arguments = json.loads(tool_call["function"]["arguments"]) \
|
|
if isinstance(tool_call["function"]["arguments"], str) \
|
|
else tool_call["function"]["arguments"]
|
|
except json.JSONDecodeError:
|
|
arguments = {}
|
|
|
|
tool_call_json = {
|
|
"name": tool_call["function"]["name"],
|
|
"arguments": arguments
|
|
}
|
|
content += f"<tool_call>\n{json.dumps(tool_call_json, ensure_ascii=False)}\n</tool_call>\n"
|
|
|
|
trajectory.append({"from": "gpt", "value": content.rstrip()})
|
|
|
|
# Collect subsequent tool responses
|
|
tool_responses = []
|
|
j = i + 1
|
|
while j < len(messages) and messages[j]["role"] == "tool":
|
|
tool_msg = messages[j]
|
|
tool_content = tool_msg["content"]
|
|
|
|
# Try to parse as JSON
|
|
try:
|
|
if tool_content.strip().startswith(("{", "[")):
|
|
tool_content = json.loads(tool_content)
|
|
except (json.JSONDecodeError, AttributeError):
|
|
pass
|
|
|
|
tool_response = f"<tool_response>\n"
|
|
tool_response += json.dumps({
|
|
"tool_call_id": tool_msg.get("tool_call_id", ""),
|
|
"name": msg["tool_calls"][len(tool_responses)]["function"]["name"] \
|
|
if len(tool_responses) < len(msg["tool_calls"]) else "unknown",
|
|
"content": tool_content
|
|
}, ensure_ascii=False)
|
|
tool_response += "\n</tool_response>"
|
|
tool_responses.append(tool_response)
|
|
j += 1
|
|
|
|
if tool_responses:
|
|
trajectory.append({"from": "tool", "value": "\n".join(tool_responses)})
|
|
i = j - 1
|
|
|
|
else:
|
|
# Regular assistant message (no tool calls)
|
|
content = ""
|
|
if msg.get("reasoning"):
|
|
content = f"<think>{msg['reasoning']}</think>"
|
|
content += msg.get("content") or ""
|
|
trajectory.append({"from": "gpt", "value": content})
|
|
|
|
elif msg["role"] == "user":
|
|
trajectory.append({"from": "human", "value": msg["content"]})
|
|
|
|
i += 1
|
|
|
|
return trajectory
|
|
|
|
def run_task(self, task: str) -> Dict[str, Any]:
|
|
"""
|
|
Run a single task and return the result with trajectory.
|
|
|
|
Args:
|
|
task: The task/prompt to execute
|
|
|
|
Returns:
|
|
Dict with trajectory, completion status, and metadata
|
|
"""
|
|
print(f"\n{'='*60}")
|
|
print(f"📝 Task: {task[:80]}{'...' if len(task) > 80 else ''}")
|
|
print(f"{'='*60}")
|
|
|
|
# Initialize environment
|
|
self._create_env()
|
|
|
|
# Message history
|
|
messages = [{"role": "user", "content": task}]
|
|
|
|
# System prompt for the LLM (ephemeral - not saved to trajectory)
|
|
system_prompt = """You are an AI agent that can execute bash commands to complete tasks.
|
|
|
|
When you need to run commands, use the 'terminal' tool with your bash command.
|
|
|
|
**Important:**
|
|
- When you have completed the task successfully, run: echo "MINI_SWE_AGENT_FINAL_OUTPUT" followed by a summary
|
|
- Be concise and efficient in your approach
|
|
- Install any needed tools with apt-get or pip
|
|
- Avoid interactive commands (no vim, nano, less, etc.)
|
|
|
|
Complete the user's task step by step."""
|
|
|
|
api_call_count = 0
|
|
completed = False
|
|
final_response = None
|
|
|
|
try:
|
|
while api_call_count < self.max_iterations:
|
|
api_call_count += 1
|
|
print(f"\n🔄 API call #{api_call_count}/{self.max_iterations}")
|
|
|
|
# Prepare API messages
|
|
api_messages = [{"role": "system", "content": system_prompt}] + messages
|
|
|
|
# Make API call
|
|
try:
|
|
response = self.client.chat.completions.create(
|
|
model=self.model,
|
|
messages=api_messages,
|
|
tools=self.tools,
|
|
timeout=300.0
|
|
)
|
|
except Exception as e:
|
|
self.logger.error(f"API call failed: {e}")
|
|
break
|
|
|
|
assistant_message = response.choices[0].message
|
|
|
|
# Log assistant response
|
|
if assistant_message.content:
|
|
print(f"🤖 Assistant: {assistant_message.content[:100]}...")
|
|
|
|
# Check for tool calls
|
|
if assistant_message.tool_calls:
|
|
print(f"🔧 Tool calls: {len(assistant_message.tool_calls)}")
|
|
|
|
# Add assistant message with tool calls
|
|
messages.append({
|
|
"role": "assistant",
|
|
"content": assistant_message.content,
|
|
"tool_calls": [
|
|
{
|
|
"id": tc.id,
|
|
"type": tc.type,
|
|
"function": {
|
|
"name": tc.function.name,
|
|
"arguments": tc.function.arguments
|
|
}
|
|
}
|
|
for tc in assistant_message.tool_calls
|
|
]
|
|
})
|
|
|
|
# Execute each tool call
|
|
for tc in assistant_message.tool_calls:
|
|
try:
|
|
args = json.loads(tc.function.arguments)
|
|
except json.JSONDecodeError:
|
|
args = {}
|
|
|
|
command = args.get("command", "echo 'No command provided'")
|
|
timeout = args.get("timeout", self.command_timeout)
|
|
|
|
print(f" 📞 terminal: {command[:60]}...")
|
|
|
|
# Execute command
|
|
result = self._execute_command(command, timeout)
|
|
|
|
# Format result
|
|
result_json = json.dumps({
|
|
"content": {
|
|
"output": result["output"],
|
|
"exit_code": result["exit_code"],
|
|
"error": result["error"]
|
|
}
|
|
}, ensure_ascii=False)
|
|
|
|
# Check for task completion signal
|
|
if "MINI_SWE_AGENT_FINAL_OUTPUT" in result["output"]:
|
|
print(f" ✅ Task completion signal detected!")
|
|
completed = True
|
|
|
|
# Add tool response
|
|
messages.append({
|
|
"role": "tool",
|
|
"content": result_json,
|
|
"tool_call_id": tc.id
|
|
})
|
|
|
|
print(f" ✅ exit_code={result['exit_code']}, output={len(result['output'])} chars")
|
|
|
|
# If task completed, we can stop
|
|
if completed:
|
|
final_response = assistant_message.content
|
|
break
|
|
|
|
else:
|
|
# No tool calls - final response
|
|
final_response = assistant_message.content or ""
|
|
messages.append({
|
|
"role": "assistant",
|
|
"content": final_response
|
|
})
|
|
completed = True
|
|
print(f"🎉 Agent finished (no more tool calls)")
|
|
break
|
|
|
|
if api_call_count >= self.max_iterations:
|
|
print(f"⚠️ Reached max iterations ({self.max_iterations})")
|
|
|
|
finally:
|
|
# Cleanup environment
|
|
self._cleanup_env()
|
|
|
|
# Convert to Hermes trajectory format
|
|
trajectory = self._convert_to_hermes_format(messages, task, completed)
|
|
|
|
return {
|
|
"conversations": trajectory,
|
|
"completed": completed,
|
|
"api_calls": api_call_count,
|
|
"metadata": {
|
|
"model": self.model,
|
|
"env_type": self.env_type,
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
}
|
|
|
|
def run_batch(
|
|
self,
|
|
prompts: List[str],
|
|
output_file: str
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Run multiple tasks and save trajectories to a JSONL file.
|
|
|
|
Args:
|
|
prompts: List of task prompts
|
|
output_file: Output JSONL file path
|
|
|
|
Returns:
|
|
List of results
|
|
"""
|
|
results = []
|
|
|
|
print(f"\n📦 Running batch of {len(prompts)} tasks")
|
|
print(f"📁 Output: {output_file}")
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
for i, prompt in enumerate(prompts, 1):
|
|
print(f"\n{'='*60}")
|
|
print(f"📋 Task {i}/{len(prompts)}")
|
|
print(f"{'='*60}")
|
|
|
|
try:
|
|
result = self.run_task(prompt)
|
|
results.append(result)
|
|
|
|
# Write to file immediately
|
|
f.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
f.flush()
|
|
|
|
print(f"✅ Task {i} completed (api_calls={result['api_calls']})")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error on task {i}: {e}")
|
|
error_result = {
|
|
"conversations": [],
|
|
"completed": False,
|
|
"api_calls": 0,
|
|
"error": str(e),
|
|
"metadata": {"timestamp": datetime.now().isoformat()}
|
|
}
|
|
results.append(error_result)
|
|
f.write(json.dumps(error_result, ensure_ascii=False) + "\n")
|
|
f.flush()
|
|
|
|
print(f"\n✅ Batch complete! {len(results)} trajectories saved to {output_file}")
|
|
return results
|
|
|
|
|
|
# ============================================================================
|
|
# CLI Interface
|
|
# ============================================================================
|
|
|
|
def main(
|
|
task: str = None,
|
|
prompts_file: str = None,
|
|
output_file: str = "mini-swe-agent-test1.jsonl",
|
|
model: str = "claude-sonnet-4-20250514",
|
|
base_url: str = None,
|
|
api_key: str = None,
|
|
env: str = "local",
|
|
image: str = "python:3.11-slim",
|
|
cwd: str = "/tmp",
|
|
max_iterations: int = 15,
|
|
timeout: int = 60,
|
|
verbose: bool = False,
|
|
):
|
|
"""
|
|
Run mini-swe-agent tasks with Hermes trajectory format output.
|
|
|
|
Args:
|
|
task: Single task to run (use this OR prompts_file)
|
|
prompts_file: JSONL file with prompts (each line: {"prompt": "..."})
|
|
output_file: Output JSONL file for trajectories
|
|
model: Model name (default: claude-sonnet-4-20250514)
|
|
base_url: API base URL (optional)
|
|
api_key: API key (optional, uses env vars)
|
|
env: Environment type - "local", "docker", or "modal"
|
|
image: Docker/Modal image (default: python:3.11-slim)
|
|
cwd: Working directory (default: /tmp)
|
|
max_iterations: Maximum tool-calling iterations (default: 15)
|
|
timeout: Command timeout in seconds (default: 60)
|
|
verbose: Enable verbose logging
|
|
|
|
Examples:
|
|
# Single task with local environment
|
|
python mini_swe_runner.py --task "Create hello.py that prints Hello World"
|
|
|
|
# Single task with Docker
|
|
python mini_swe_runner.py --task "List files" --env docker
|
|
|
|
# Batch from file
|
|
python mini_swe_runner.py --prompts_file tasks.jsonl --output_file results.jsonl
|
|
"""
|
|
print("🚀 Mini-SWE Runner with Hermes Trajectory Format")
|
|
print("=" * 60)
|
|
|
|
# Initialize runner
|
|
runner = MiniSWERunner(
|
|
model=model,
|
|
base_url=base_url,
|
|
api_key=api_key,
|
|
env_type=env,
|
|
image=image,
|
|
cwd=cwd,
|
|
max_iterations=max_iterations,
|
|
command_timeout=timeout,
|
|
verbose=verbose,
|
|
)
|
|
|
|
if task:
|
|
# Single task mode
|
|
result = runner.run_task(task)
|
|
|
|
# Save to file
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(json.dumps(result, ensure_ascii=False) + "\n")
|
|
|
|
print(f"\n📁 Trajectory saved to: {output_file}")
|
|
print(f"✅ Completed: {result['completed']}")
|
|
print(f"📞 API calls: {result['api_calls']}")
|
|
print(f"💬 Turns: {len(result['conversations'])}")
|
|
|
|
elif prompts_file:
|
|
# Batch mode
|
|
prompts = []
|
|
with open(prompts_file, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line:
|
|
try:
|
|
entry = json.loads(line)
|
|
prompts.append(entry.get("prompt", entry.get("task", "")))
|
|
except json.JSONDecodeError:
|
|
prompts.append(line)
|
|
|
|
if not prompts:
|
|
print(f"❌ No prompts found in {prompts_file}")
|
|
return
|
|
|
|
runner.run_batch(prompts, output_file)
|
|
|
|
else:
|
|
print("❌ Please provide either --task or --prompts_file")
|
|
print(" Example: python mini_swe_runner.py --task 'Create a hello world script'")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
fire.Fire(main)
|