refactor: move model metadata functions to agent/model_metadata.py

- Relocated functions related to model metadata, including fetch_model_metadata, get_model_context_length, estimate_tokens_rough, and estimate_messages_tokens_rough, to agent/model_metadata.py for better organization and maintainability. - Updated imports in run_agent.py to reflect the new location of these functions.
2026-02-21 22:34:18 -08:00
parent 9123cfb5dd
commit 51b95236f9
1 changed files with 3 additions and 130 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -77,136 +77,9 @@ from agent.trajectory import (
    save_trajectory as _save_trajectory_to_file,
 )

-# =============================================================================
-# Model Context Management  (extracted to agent/model_metadata.py)
-# The functions below are re-imported above; these stubs maintain the
-# module-level names for any internal references that use the unqualified name.
-# =============================================================================
-
-DEFAULT_CONTEXT_LENGTHS = {
-    "anthropic/claude-opus-4": 200000,
-    "anthropic/claude-opus-4.5": 200000,
-    "anthropic/claude-opus-4.6": 200000,
-    "anthropic/claude-sonnet-4": 200000,
-    "anthropic/claude-sonnet-4-20250514": 200000,
-    "anthropic/claude-haiku-4.5": 200000,
-    "openai/gpt-4o": 128000,
-    "openai/gpt-4-turbo": 128000,
-    "openai/gpt-4o-mini": 128000,
-    "google/gemini-2.0-flash": 1048576,
-    "google/gemini-2.5-pro": 1048576,
-    "meta-llama/llama-3.3-70b-instruct": 131072,
-    "deepseek/deepseek-chat-v3": 65536,
-    "qwen/qwen-2.5-72b-instruct": 32768,
-}
-
-
-def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any]]:
-    """
-    Fetch model metadata from OpenRouter's /api/v1/models endpoint.
-    Results are cached for 1 hour to minimize API calls.
-    
-    Returns:
-        Dict mapping model_id to metadata (context_length, max_completion_tokens, etc.)
-    """
-    global _model_metadata_cache, _model_metadata_cache_time
-    
-    # Return cached data if fresh
-    if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL:
-        return _model_metadata_cache
-    
-    try:
-        response = requests.get(
-            OPENROUTER_MODELS_URL,
-            timeout=10
-        )
-        response.raise_for_status()
-        data = response.json()
-        
-        # Build cache mapping model_id to relevant metadata
-        cache = {}
-        for model in data.get("data", []):
-            model_id = model.get("id", "")
-            cache[model_id] = {
-                "context_length": model.get("context_length", 128000),
-                "max_completion_tokens": model.get("top_provider", {}).get("max_completion_tokens", 4096),
-                "name": model.get("name", model_id),
-                "pricing": model.get("pricing", {}),
-            }
-            # Also cache by canonical slug if different
-            canonical = model.get("canonical_slug", "")
-            if canonical and canonical != model_id:
-                cache[canonical] = cache[model_id]
-        
-        _model_metadata_cache = cache
-        _model_metadata_cache_time = time.time()
-        
-        logger.debug("Fetched metadata for %s models from OpenRouter", len(cache))
-        
-        return cache
-        
-    except Exception as e:
-        logging.warning(f"Failed to fetch model metadata from OpenRouter: {e}")
-        # Return cached data even if stale, or empty dict
-        return _model_metadata_cache or {}
-
-
-def get_model_context_length(model: str) -> int:
-    """
-    Get the context length for a specific model.
-    
-    Args:
-        model: Model identifier (e.g., "anthropic/claude-sonnet-4")
-        
-    Returns:
-        Context length in tokens (defaults to 128000 if unknown)
-    """
-    # Try to get from OpenRouter API
-    metadata = fetch_model_metadata()
-    if model in metadata:
-        return metadata[model].get("context_length", 128000)
-    
-    # Check default fallbacks (handles partial matches)
-    for default_model, length in DEFAULT_CONTEXT_LENGTHS.items():
-        if default_model in model or model in default_model:
-            return length
-    
-    # Conservative default
-    return 128000
-
-
-def estimate_tokens_rough(text: str) -> int:
-    """
-    Rough token estimate for pre-flight checks (before API call).
-    Uses ~4 chars per token heuristic.
-    
-    For accurate counts, use the `usage.prompt_tokens` from API responses.
-    
-    Args:
-        text: Text to estimate tokens for
-        
-    Returns:
-        Rough estimated token count
-    """
-    if not text:
-        return 0
-    return len(text) // 4
-
-
-def estimate_messages_tokens_rough(messages: List[Dict[str, Any]]) -> int:
-    """
-    Rough token estimate for messages (pre-flight check only).
-    
-    For accurate counts, use the `usage.prompt_tokens` from API responses.
-    
-    Args:
-        messages: List of message dicts
-        
-    Returns:
-        Rough estimated token count
-    """
-    total_chars = sum(len(str(msg)) for msg in messages)
-    return total_chars // 4
+# Model metadata functions (fetch_model_metadata, get_model_context_length,
+# estimate_tokens_rough, estimate_messages_tokens_rough) are now in
+# agent/model_metadata.py -- imported above.


 class ContextCompressor: