forked from Rockachopa/Timmy-time-dashboard
@@ -1 +1 @@
|
||||
"""Timmy — Core AI agent (Ollama/AirLLM backends, CLI, prompts)."""
|
||||
"""Timmy — Core AI agent (Ollama/Grok/Claude backends, CLI, prompts)."""
|
||||
|
||||
@@ -26,12 +26,12 @@ from timmy.prompts import get_system_prompt
|
||||
from timmy.tools import create_full_toolkit
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from timmy.backends import ClaudeBackend, GrokBackend, TimmyAirLLMAgent
|
||||
from timmy.backends import ClaudeBackend, GrokBackend
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Union type for callers that want to hint the return type.
|
||||
TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend", "ClaudeBackend"]
|
||||
TimmyAgent = Union[Agent, "GrokBackend", "ClaudeBackend"]
|
||||
|
||||
# Models known to be too small for reliable tool calling.
|
||||
# These hallucinate tool calls as text, invoke tools randomly,
|
||||
@@ -172,29 +172,17 @@ def _warmup_model(model_name: str) -> bool:
|
||||
|
||||
|
||||
def _resolve_backend(requested: str | None) -> str:
|
||||
"""Return the backend name to use, resolving 'auto' and explicit overrides.
|
||||
"""Return the backend name to use.
|
||||
|
||||
Priority (highest → lowest):
|
||||
Priority (highest -> lowest):
|
||||
1. CLI flag passed directly to create_timmy()
|
||||
2. TIMMY_MODEL_BACKEND env var / .env setting
|
||||
3. 'ollama' (safe default — no surprises)
|
||||
|
||||
'auto' triggers Apple Silicon detection: uses AirLLM if both
|
||||
is_apple_silicon() and airllm_available() return True.
|
||||
3. 'ollama' (safe default -- no surprises)
|
||||
"""
|
||||
if requested is not None:
|
||||
return requested
|
||||
|
||||
configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "claude" | "auto"
|
||||
if configured != "auto":
|
||||
return configured
|
||||
|
||||
# "auto" path — lazy import to keep startup fast and tests clean.
|
||||
from timmy.backends import airllm_available, is_apple_silicon
|
||||
|
||||
if is_apple_silicon() and airllm_available():
|
||||
return "airllm"
|
||||
return "ollama"
|
||||
return settings.timmy_model_backend # "ollama" | "grok" | "claude"
|
||||
|
||||
|
||||
def _build_tools_list(use_tools: bool, skip_mcp: bool, model_name: str) -> list:
|
||||
@@ -284,17 +272,15 @@ def _create_ollama_agent(
|
||||
def create_timmy(
|
||||
db_file: str = "timmy.db",
|
||||
backend: str | None = None,
|
||||
model_size: str | None = None,
|
||||
*,
|
||||
skip_mcp: bool = False,
|
||||
session_id: str = "unknown",
|
||||
) -> TimmyAgent:
|
||||
"""Instantiate the agent — Ollama or AirLLM, same public interface.
|
||||
"""Instantiate the agent — Ollama, Grok, or Claude.
|
||||
|
||||
Args:
|
||||
db_file: SQLite file for Agno conversation memory (Ollama path only).
|
||||
backend: "ollama" | "airllm" | "auto" | None (reads config/env).
|
||||
model_size: AirLLM size — "8b" | "70b" | "405b" | None (reads config).
|
||||
backend: "ollama" | "grok" | "claude" | None (reads config/env).
|
||||
skip_mcp: If True, omit MCP tool servers (Gitea, filesystem).
|
||||
Use for background tasks (thinking, QA) where MCP's
|
||||
stdio cancel-scope lifecycle conflicts with asyncio
|
||||
@@ -304,7 +290,6 @@ def create_timmy(
|
||||
print_response(message, stream).
|
||||
"""
|
||||
resolved = _resolve_backend(backend)
|
||||
size = model_size or "70b"
|
||||
|
||||
if resolved == "claude":
|
||||
from timmy.backends import ClaudeBackend
|
||||
@@ -316,11 +301,6 @@ def create_timmy(
|
||||
|
||||
return GrokBackend()
|
||||
|
||||
if resolved == "airllm":
|
||||
from timmy.backends import TimmyAirLLMAgent
|
||||
|
||||
return TimmyAirLLMAgent(model_size=size)
|
||||
|
||||
# Default: Ollama via Agno.
|
||||
model_name, is_fallback = _resolve_model_with_fallback(
|
||||
requested_model=None,
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
"""LLM backends — AirLLM (local big models), Grok (xAI), and Claude (Anthropic).
|
||||
"""LLM backends — Grok (xAI) and Claude (Anthropic).
|
||||
|
||||
Provides drop-in replacements for the Agno Agent that expose the same
|
||||
run(message, stream) → RunResult interface used by the dashboard and the
|
||||
print_response(message, stream) interface used by the CLI.
|
||||
|
||||
Backends:
|
||||
- TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch)
|
||||
- GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
|
||||
- ClaudeBackend: Anthropic Claude API — lightweight cloud fallback
|
||||
|
||||
@@ -16,21 +15,11 @@ import logging
|
||||
import platform
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal
|
||||
|
||||
from timmy.prompts import get_system_prompt
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# HuggingFace model IDs for each supported size.
|
||||
_AIRLLM_MODELS: dict[str, str] = {
|
||||
"8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"70b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||
"405b": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
||||
}
|
||||
|
||||
ModelSize = Literal["8b", "70b", "405b"]
|
||||
|
||||
|
||||
@dataclass
|
||||
class RunResult:
|
||||
@@ -45,108 +34,6 @@ def is_apple_silicon() -> bool:
|
||||
return platform.system() == "Darwin" and platform.machine() == "arm64"
|
||||
|
||||
|
||||
def airllm_available() -> bool:
|
||||
"""Return True when the airllm package is importable."""
|
||||
try:
|
||||
import airllm # noqa: F401
|
||||
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
class TimmyAirLLMAgent:
|
||||
"""Thin AirLLM wrapper compatible with both dashboard and CLI call sites.
|
||||
|
||||
Exposes:
|
||||
run(message, stream) → RunResult(content=...) [dashboard]
|
||||
print_response(message, stream) → None [CLI]
|
||||
|
||||
Maintains a rolling 10-turn in-memory history so Timmy remembers the
|
||||
conversation within a session — no SQLite needed at this layer.
|
||||
"""
|
||||
|
||||
def __init__(self, model_size: str = "70b") -> None:
|
||||
model_id = _AIRLLM_MODELS.get(model_size)
|
||||
if model_id is None:
|
||||
raise ValueError(
|
||||
f"Unknown model size {model_size!r}. Choose from: {list(_AIRLLM_MODELS)}"
|
||||
)
|
||||
|
||||
if is_apple_silicon():
|
||||
from airllm import AirLLMMLX # type: ignore[import]
|
||||
|
||||
self._model = AirLLMMLX(model_id)
|
||||
else:
|
||||
from airllm import AutoModel # type: ignore[import]
|
||||
|
||||
self._model = AutoModel.from_pretrained(model_id)
|
||||
|
||||
self._history: list[str] = []
|
||||
self._model_size = model_size
|
||||
|
||||
# ── public interface (mirrors Agno Agent) ────────────────────────────────
|
||||
|
||||
def run(self, message: str, *, stream: bool = False) -> RunResult:
|
||||
"""Run inference and return a structured result (matches Agno Agent.run()).
|
||||
|
||||
`stream` is accepted for API compatibility; AirLLM always generates
|
||||
the full output in one pass.
|
||||
"""
|
||||
prompt = self._build_prompt(message)
|
||||
|
||||
input_tokens = self._model.tokenizer(
|
||||
[prompt],
|
||||
return_tensors="pt",
|
||||
padding=True,
|
||||
truncation=True,
|
||||
max_length=2048,
|
||||
)
|
||||
output = self._model.generate(
|
||||
**input_tokens,
|
||||
max_new_tokens=512,
|
||||
use_cache=True,
|
||||
do_sample=True,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
# Decode only the newly generated tokens, not the prompt.
|
||||
input_len = input_tokens["input_ids"].shape[1]
|
||||
response = self._model.tokenizer.decode(
|
||||
output[0][input_len:], skip_special_tokens=True
|
||||
).strip()
|
||||
|
||||
self._history.append(f"User: {message}")
|
||||
self._history.append(f"Timmy: {response}")
|
||||
|
||||
return RunResult(content=response)
|
||||
|
||||
def print_response(self, message: str, *, stream: bool = True) -> None:
|
||||
"""Run inference and render the response to stdout (CLI interface)."""
|
||||
result = self.run(message, stream=stream)
|
||||
self._render(result.content)
|
||||
|
||||
# ── private helpers ──────────────────────────────────────────────────────
|
||||
|
||||
def _build_prompt(self, message: str) -> str:
|
||||
context = get_system_prompt(tools_enabled=False, session_id="airllm") + "\n\n"
|
||||
# Include the last 10 turns (5 exchanges) for continuity.
|
||||
if self._history:
|
||||
context += "\n".join(self._history[-10:]) + "\n\n"
|
||||
return context + f"User: {message}\nTimmy:"
|
||||
|
||||
@staticmethod
|
||||
def _render(text: str) -> None:
|
||||
"""Print response with rich markdown when available, plain text otherwise."""
|
||||
try:
|
||||
from rich.console import Console
|
||||
from rich.markdown import Markdown
|
||||
|
||||
Console().print(Markdown(text))
|
||||
except ImportError:
|
||||
print(text)
|
||||
|
||||
|
||||
# ── Grok (xAI) Backend ─────────────────────────────────────────────────────
|
||||
# Premium cloud augmentation — opt-in only, never the default path.
|
||||
|
||||
@@ -187,7 +74,7 @@ class GrokBackend:
|
||||
Uses the OpenAI-compatible SDK to connect to xAI's API.
|
||||
Only activated when GROK_ENABLED=true and XAI_API_KEY is set.
|
||||
|
||||
Exposes the same interface as TimmyAirLLMAgent and Agno Agent:
|
||||
Exposes the same interface as Agno Agent:
|
||||
run(message, stream) → RunResult [dashboard]
|
||||
print_response(message, stream) → None [CLI]
|
||||
health_check() → dict [monitoring]
|
||||
@@ -437,8 +324,7 @@ CLAUDE_MODELS: dict[str, str] = {
|
||||
class ClaudeBackend:
|
||||
"""Anthropic Claude backend — cloud fallback when local models are offline.
|
||||
|
||||
Uses the official Anthropic SDK. Same interface as GrokBackend and
|
||||
TimmyAirLLMAgent:
|
||||
Uses the official Anthropic SDK. Same interface as GrokBackend:
|
||||
run(message, stream) → RunResult [dashboard]
|
||||
print_response(message, stream) → None [CLI]
|
||||
health_check() → dict [monitoring]
|
||||
|
||||
@@ -22,13 +22,13 @@ _BACKEND_OPTION = typer.Option(
|
||||
None,
|
||||
"--backend",
|
||||
"-b",
|
||||
help="Inference backend: 'ollama' (default) | 'airllm' | 'auto'",
|
||||
help="Inference backend: 'ollama' (default) | 'grok' | 'claude'",
|
||||
)
|
||||
_MODEL_SIZE_OPTION = typer.Option(
|
||||
None,
|
||||
"--model-size",
|
||||
"-s",
|
||||
help="AirLLM model size when --backend airllm: '8b' | '70b' | '405b'",
|
||||
help="Model size (reserved for future use).",
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ def get_system_info() -> dict[str, Any]:
|
||||
- python_version: Python version
|
||||
- platform: OS platform
|
||||
- model: Current Ollama model (queried from API)
|
||||
- model_backend: Configured backend (ollama/airllm/grok)
|
||||
- model_backend: Configured backend (ollama/grok/claude)
|
||||
- ollama_url: Ollama host URL
|
||||
- repo_root: Repository root path
|
||||
- grok_enabled: Whether GROK is enabled
|
||||
|
||||
Reference in New Issue
Block a user