refactor: move model metadata functions to agent/model_metadata.py
- Relocated functions related to model metadata, including fetch_model_metadata, get_model_context_length, estimate_tokens_rough, and estimate_messages_tokens_rough, to agent/model_metadata.py for better organization and maintainability. - Updated imports in run_agent.py to reflect the new location of these functions.
This commit is contained in:
133
run_agent.py
133
run_agent.py
@@ -77,136 +77,9 @@ from agent.trajectory import (
|
||||
save_trajectory as _save_trajectory_to_file,
|
||||
)
|
||||
|
||||
# =============================================================================
|
||||
# Model Context Management (extracted to agent/model_metadata.py)
|
||||
# The functions below are re-imported above; these stubs maintain the
|
||||
# module-level names for any internal references that use the unqualified name.
|
||||
# =============================================================================
|
||||
|
||||
DEFAULT_CONTEXT_LENGTHS = {
|
||||
"anthropic/claude-opus-4": 200000,
|
||||
"anthropic/claude-opus-4.5": 200000,
|
||||
"anthropic/claude-opus-4.6": 200000,
|
||||
"anthropic/claude-sonnet-4": 200000,
|
||||
"anthropic/claude-sonnet-4-20250514": 200000,
|
||||
"anthropic/claude-haiku-4.5": 200000,
|
||||
"openai/gpt-4o": 128000,
|
||||
"openai/gpt-4-turbo": 128000,
|
||||
"openai/gpt-4o-mini": 128000,
|
||||
"google/gemini-2.0-flash": 1048576,
|
||||
"google/gemini-2.5-pro": 1048576,
|
||||
"meta-llama/llama-3.3-70b-instruct": 131072,
|
||||
"deepseek/deepseek-chat-v3": 65536,
|
||||
"qwen/qwen-2.5-72b-instruct": 32768,
|
||||
}
|
||||
|
||||
|
||||
def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
Fetch model metadata from OpenRouter's /api/v1/models endpoint.
|
||||
Results are cached for 1 hour to minimize API calls.
|
||||
|
||||
Returns:
|
||||
Dict mapping model_id to metadata (context_length, max_completion_tokens, etc.)
|
||||
"""
|
||||
global _model_metadata_cache, _model_metadata_cache_time
|
||||
|
||||
# Return cached data if fresh
|
||||
if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
|
||||
return _model_metadata_cache
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
OPENROUTER_MODELS_URL,
|
||||
timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Build cache mapping model_id to relevant metadata
|
||||
cache = {}
|
||||
for model in data.get("data", []):
|
||||
model_id = model.get("id", "")
|
||||
cache[model_id] = {
|
||||
"context_length": model.get("context_length", 128000),
|
||||
"max_completion_tokens": model.get("top_provider", {}).get("max_completion_tokens", 4096),
|
||||
"name": model.get("name", model_id),
|
||||
"pricing": model.get("pricing", {}),
|
||||
}
|
||||
# Also cache by canonical slug if different
|
||||
canonical = model.get("canonical_slug", "")
|
||||
if canonical and canonical != model_id:
|
||||
cache[canonical] = cache[model_id]
|
||||
|
||||
_model_metadata_cache = cache
|
||||
_model_metadata_cache_time = time.time()
|
||||
|
||||
logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
|
||||
|
||||
return cache
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
|
||||
# Return cached data even if stale, or empty dict
|
||||
return _model_metadata_cache or {}
|
||||
|
||||
|
||||
def get_model_context_length(model: str) -> int:
|
||||
"""
|
||||
Get the context length for a specific model.
|
||||
|
||||
Args:
|
||||
model: Model identifier (e.g., "anthropic/claude-sonnet-4")
|
||||
|
||||
Returns:
|
||||
Context length in tokens (defaults to 128000 if unknown)
|
||||
"""
|
||||
# Try to get from OpenRouter API
|
||||
metadata = fetch_model_metadata()
|
||||
if model in metadata:
|
||||
return metadata[model].get("context_length", 128000)
|
||||
|
||||
# Check default fallbacks (handles partial matches)
|
||||
for default_model, length in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
if default_model in model or model in default_model:
|
||||
return length
|
||||
|
||||
# Conservative default
|
||||
return 128000
|
||||
|
||||
|
||||
def estimate_tokens_rough(text: str) -> int:
|
||||
"""
|
||||
Rough token estimate for pre-flight checks (before API call).
|
||||
Uses ~4 chars per token heuristic.
|
||||
|
||||
For accurate counts, use the `usage.prompt_tokens` from API responses.
|
||||
|
||||
Args:
|
||||
text: Text to estimate tokens for
|
||||
|
||||
Returns:
|
||||
Rough estimated token count
|
||||
"""
|
||||
if not text:
|
||||
return 0
|
||||
return len(text) // 4
|
||||
|
||||
|
||||
def estimate_messages_tokens_rough(messages: List[Dict[str, Any]]) -> int:
|
||||
"""
|
||||
Rough token estimate for messages (pre-flight check only).
|
||||
|
||||
For accurate counts, use the `usage.prompt_tokens` from API responses.
|
||||
|
||||
Args:
|
||||
messages: List of message dicts
|
||||
|
||||
Returns:
|
||||
Rough estimated token count
|
||||
"""
|
||||
total_chars = sum(len(str(msg)) for msg in messages)
|
||||
return total_chars // 4
|
||||
# Model metadata functions (fetch_model_metadata, get_model_context_length,
|
||||
# estimate_tokens_rough, estimate_messages_tokens_rough) are now in
|
||||
# agent/model_metadata.py -- imported above.
|
||||
|
||||
|
||||
class ContextCompressor:
|
||||
|
||||
Reference in New Issue
Block a user