This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/config/providers.yaml

112 lines
3.4 KiB
YAML
Raw Normal View History

# Cascade LLM Router Configuration
# Providers are tried in priority order (1 = highest)
# On failure, automatically falls back to next provider
cascade:
# Timeout settings
timeout_seconds: 30
# Retry settings
max_retries_per_provider: 2
retry_delay_seconds: 1
# Circuit breaker settings
circuit_breaker:
failure_threshold: 5 # Open circuit after 5 failures
recovery_timeout: 60 # Try again after 60 seconds
half_open_max_calls: 2 # Allow 2 test calls when half-open
providers:
# Primary: Local Ollama (always try first for sovereignty)
- name: ollama-local
type: ollama
enabled: true
priority: 1
url: "http://localhost:11434"
models:
- name: llama3.2
default: true
context_window: 128000
- name: deepseek-r1:1.5b
context_window: 32000
# Secondary: Local AirLLM (if installed)
- name: airllm-local
type: airllm
enabled: false # Enable if pip install airllm
priority: 2
models:
- name: 70b
default: true
- name: 8b
- name: 405b
# Tertiary: OpenAI (if API key available)
- name: openai-backup
type: openai
enabled: false # Enable by setting OPENAI_API_KEY
priority: 3
api_key: "${OPENAI_API_KEY}" # Loaded from environment
base_url: null # Use default OpenAI endpoint
models:
- name: gpt-4o-mini
default: true
context_window: 128000
- name: gpt-4o
context_window: 128000
# Quaternary: Anthropic (if API key available)
- name: anthropic-backup
type: anthropic
enabled: false # Enable by setting ANTHROPIC_API_KEY
priority: 4
api_key: "${ANTHROPIC_API_KEY}"
models:
- name: claude-3-haiku-20240307
default: true
context_window: 200000
- name: claude-3-sonnet-20240229
context_window: 200000
# ── Custom Models ──────────────────────────────────────────────────────
# Register custom model weights for per-agent assignment.
# Supports GGUF (Ollama), safetensors, and HuggingFace checkpoint dirs.
# Models can also be registered at runtime via the /api/v1/models API.
#
# Roles: general (default inference), reward (PRM scoring),
# teacher (distillation), judge (output evaluation)
custom_models: []
# Example entries:
# - name: my-finetuned-llama
# format: gguf
# path: /path/to/model.gguf
# role: general
# context_window: 8192
# description: "Fine-tuned Llama for code tasks"
#
# - name: reward-model
# format: ollama
# path: deepseek-r1:1.5b
# role: reward
# context_window: 32000
# description: "Process reward model for scoring outputs"
# ── Agent Model Assignments ─────────────────────────────────────────────
# Map persona agent IDs to specific models.
# Agents without an assignment use the global default (ollama_model).
agent_model_assignments: {}
# Example:
# persona-forge: my-finetuned-llama
# persona-echo: deepseek-r1:1.5b
# Cost tracking (optional, for budget monitoring)
cost_tracking:
enabled: true
budget_daily_usd: 10.0 # Alert if daily spend exceeds this
alert_threshold_percent: 80 # Alert at 80% of budget
# Metrics retention
metrics:
retention_hours: 168 # Keep 7 days of metrics
purge_interval_hours: 24