forked from Rockachopa/Timmy-time-dashboard
This commit is contained in:
@@ -30,25 +30,36 @@ class Settings(BaseSettings):
|
||||
return normalize_ollama_url(self.ollama_url)
|
||||
|
||||
# LLM model passed to Agno/Ollama — override with OLLAMA_MODEL
|
||||
# qwen3:30b is the primary model — better reasoning and tool calling
|
||||
# than llama3.1:8b-instruct while still running locally on modest hardware.
|
||||
# Fallback: llama3.1:8b-instruct if qwen3:30b not available.
|
||||
# llama3.2 (3B) hallucinated tool output consistently in testing.
|
||||
ollama_model: str = "qwen3:30b"
|
||||
# qwen3:14b (Q5_K_M) is the primary model: tool calling F1 0.971, ~17.5 GB
|
||||
# at 32K context — optimal for M3 Max 36 GB (Issue #1063).
|
||||
# qwen3:30b exceeded memory budget at 32K+ context on 36 GB hardware.
|
||||
ollama_model: str = "qwen3:14b"
|
||||
|
||||
# Fast routing model — override with OLLAMA_FAST_MODEL
|
||||
# qwen3:8b (Q6_K): tool calling F1 0.933 at ~45-55 tok/s (2x speed of 14B).
|
||||
# Use for routine tasks: simple tool calls, file reads, status checks.
|
||||
# Combined memory with qwen3:14b: ~17 GB — both can stay loaded simultaneously.
|
||||
ollama_fast_model: str = "qwen3:8b"
|
||||
|
||||
# Maximum concurrently loaded Ollama models — override with OLLAMA_MAX_LOADED_MODELS
|
||||
# Set to 2 to keep qwen3:8b (fast) + qwen3:14b (primary) both hot.
|
||||
# Requires setting OLLAMA_MAX_LOADED_MODELS=2 in the Ollama server environment.
|
||||
ollama_max_loaded_models: int = 2
|
||||
|
||||
# Context window size for Ollama inference — override with OLLAMA_NUM_CTX
|
||||
# qwen3:30b with default context eats 45GB on a 39GB Mac.
|
||||
# 4096 keeps memory at ~19GB. Set to 0 to use model defaults.
|
||||
ollama_num_ctx: int = 4096
|
||||
# qwen3:14b at 32K: ~17.5 GB total (weights + KV cache) on M3 Max 36 GB.
|
||||
# Set to 0 to use model defaults.
|
||||
ollama_num_ctx: int = 32768
|
||||
|
||||
# Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
|
||||
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:30b,llama3.1"
|
||||
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3:8b,qwen2.5:14b"
|
||||
# Or edit config/providers.yaml → fallback_chains for the canonical source.
|
||||
fallback_models: list[str] = [
|
||||
"llama3.1:8b-instruct",
|
||||
"llama3.1",
|
||||
"qwen3:8b",
|
||||
"qwen2.5:14b",
|
||||
"qwen2.5:7b",
|
||||
"llama3.1:8b-instruct",
|
||||
"llama3.1",
|
||||
"llama3.2:3b",
|
||||
]
|
||||
vision_fallback_models: list[str] = [
|
||||
|
||||
Reference in New Issue
Block a user