forked from Rockachopa/Timmy-time-dashboard
feat: add OLLAMA_NUM_CTX config to cap context window (#83)
- Add ollama_num_ctx setting (default 4096) to config.py - Pass num_ctx option to Ollama in agent.py and agents/base.py - Add OLLAMA_NUM_CTX to .env.example with usage docs - Add context_window note in providers.yaml - Fix mock_settings in test_agent.py for new attribute - qwen3:30b with 4096 ctx uses ~19GB vs 45GB default
This commit is contained in:
@@ -22,6 +22,11 @@ class Settings(BaseSettings):
|
||||
# llama3.2 (3B) hallucinated tool output consistently in testing.
|
||||
ollama_model: str = "qwen3.5:latest"
|
||||
|
||||
# Context window size for Ollama inference — override with OLLAMA_NUM_CTX
|
||||
# qwen3:30b with default context eats 45GB on a 39GB Mac.
|
||||
# 4096 keeps memory at ~19GB. Set to 0 to use model defaults.
|
||||
ollama_num_ctx: int = 4096
|
||||
|
||||
# Fallback model chains — override with FALLBACK_MODELS / VISION_FALLBACK_MODELS
|
||||
# as comma-separated strings, e.g. FALLBACK_MODELS="qwen3.5:latest,llama3.1"
|
||||
# Or edit config/providers.yaml → fallback_chains for the canonical source.
|
||||
|
||||
@@ -306,9 +306,12 @@ def create_timmy(
|
||||
logger.warning("Failed to load memory context: %s", exc)
|
||||
full_prompt = base_prompt
|
||||
|
||||
model_kwargs = {}
|
||||
if settings.ollama_num_ctx > 0:
|
||||
model_kwargs["options"] = {"num_ctx": settings.ollama_num_ctx}
|
||||
agent = Agent(
|
||||
name="Agent",
|
||||
model=Ollama(id=model_name, host=settings.ollama_url, timeout=300),
|
||||
model=Ollama(id=model_name, host=settings.ollama_url, timeout=300, **model_kwargs),
|
||||
db=SqliteDb(db_file=db_file),
|
||||
description=full_prompt,
|
||||
add_history_to_context=True,
|
||||
|
||||
@@ -73,9 +73,12 @@ class BaseAgent(ABC):
|
||||
if handler:
|
||||
tool_instances.append(handler)
|
||||
|
||||
ollama_kwargs = {}
|
||||
if settings.ollama_num_ctx > 0:
|
||||
ollama_kwargs["options"] = {"num_ctx": settings.ollama_num_ctx}
|
||||
return Agent(
|
||||
name=self.name,
|
||||
model=Ollama(id=self.model, host=settings.ollama_url, timeout=300),
|
||||
model=Ollama(id=self.model, host=settings.ollama_url, timeout=300, **ollama_kwargs),
|
||||
description=system_prompt,
|
||||
tools=tool_instances if tool_instances else None,
|
||||
add_history_to_context=True,
|
||||
|
||||
Reference in New Issue
Block a user