forked from Rockachopa/Timmy-time-dashboard
@@ -1 +1 @@
|
|||||||
"""Timmy — Core AI agent (Ollama/AirLLM backends, CLI, prompts)."""
|
"""Timmy — Core AI agent (Ollama/Grok/Claude backends, CLI, prompts)."""
|
||||||
|
|||||||
@@ -26,12 +26,12 @@ from timmy.prompts import get_system_prompt
|
|||||||
from timmy.tools import create_full_toolkit
|
from timmy.tools import create_full_toolkit
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from timmy.backends import ClaudeBackend, GrokBackend, TimmyAirLLMAgent
|
from timmy.backends import ClaudeBackend, GrokBackend
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Union type for callers that want to hint the return type.
|
# Union type for callers that want to hint the return type.
|
||||||
TimmyAgent = Union[Agent, "TimmyAirLLMAgent", "GrokBackend", "ClaudeBackend"]
|
TimmyAgent = Union[Agent, "GrokBackend", "ClaudeBackend"]
|
||||||
|
|
||||||
# Models known to be too small for reliable tool calling.
|
# Models known to be too small for reliable tool calling.
|
||||||
# These hallucinate tool calls as text, invoke tools randomly,
|
# These hallucinate tool calls as text, invoke tools randomly,
|
||||||
@@ -172,29 +172,17 @@ def _warmup_model(model_name: str) -> bool:
|
|||||||
|
|
||||||
|
|
||||||
def _resolve_backend(requested: str | None) -> str:
|
def _resolve_backend(requested: str | None) -> str:
|
||||||
"""Return the backend name to use, resolving 'auto' and explicit overrides.
|
"""Return the backend name to use.
|
||||||
|
|
||||||
Priority (highest → lowest):
|
Priority (highest -> lowest):
|
||||||
1. CLI flag passed directly to create_timmy()
|
1. CLI flag passed directly to create_timmy()
|
||||||
2. TIMMY_MODEL_BACKEND env var / .env setting
|
2. TIMMY_MODEL_BACKEND env var / .env setting
|
||||||
3. 'ollama' (safe default — no surprises)
|
3. 'ollama' (safe default -- no surprises)
|
||||||
|
|
||||||
'auto' triggers Apple Silicon detection: uses AirLLM if both
|
|
||||||
is_apple_silicon() and airllm_available() return True.
|
|
||||||
"""
|
"""
|
||||||
if requested is not None:
|
if requested is not None:
|
||||||
return requested
|
return requested
|
||||||
|
|
||||||
configured = settings.timmy_model_backend # "ollama" | "airllm" | "grok" | "claude" | "auto"
|
return settings.timmy_model_backend # "ollama" | "grok" | "claude"
|
||||||
if configured != "auto":
|
|
||||||
return configured
|
|
||||||
|
|
||||||
# "auto" path — lazy import to keep startup fast and tests clean.
|
|
||||||
from timmy.backends import airllm_available, is_apple_silicon
|
|
||||||
|
|
||||||
if is_apple_silicon() and airllm_available():
|
|
||||||
return "airllm"
|
|
||||||
return "ollama"
|
|
||||||
|
|
||||||
|
|
||||||
def _build_tools_list(use_tools: bool, skip_mcp: bool, model_name: str) -> list:
|
def _build_tools_list(use_tools: bool, skip_mcp: bool, model_name: str) -> list:
|
||||||
@@ -284,17 +272,15 @@ def _create_ollama_agent(
|
|||||||
def create_timmy(
|
def create_timmy(
|
||||||
db_file: str = "timmy.db",
|
db_file: str = "timmy.db",
|
||||||
backend: str | None = None,
|
backend: str | None = None,
|
||||||
model_size: str | None = None,
|
|
||||||
*,
|
*,
|
||||||
skip_mcp: bool = False,
|
skip_mcp: bool = False,
|
||||||
session_id: str = "unknown",
|
session_id: str = "unknown",
|
||||||
) -> TimmyAgent:
|
) -> TimmyAgent:
|
||||||
"""Instantiate the agent — Ollama or AirLLM, same public interface.
|
"""Instantiate the agent — Ollama, Grok, or Claude.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db_file: SQLite file for Agno conversation memory (Ollama path only).
|
db_file: SQLite file for Agno conversation memory (Ollama path only).
|
||||||
backend: "ollama" | "airllm" | "auto" | None (reads config/env).
|
backend: "ollama" | "grok" | "claude" | None (reads config/env).
|
||||||
model_size: AirLLM size — "8b" | "70b" | "405b" | None (reads config).
|
|
||||||
skip_mcp: If True, omit MCP tool servers (Gitea, filesystem).
|
skip_mcp: If True, omit MCP tool servers (Gitea, filesystem).
|
||||||
Use for background tasks (thinking, QA) where MCP's
|
Use for background tasks (thinking, QA) where MCP's
|
||||||
stdio cancel-scope lifecycle conflicts with asyncio
|
stdio cancel-scope lifecycle conflicts with asyncio
|
||||||
@@ -304,7 +290,6 @@ def create_timmy(
|
|||||||
print_response(message, stream).
|
print_response(message, stream).
|
||||||
"""
|
"""
|
||||||
resolved = _resolve_backend(backend)
|
resolved = _resolve_backend(backend)
|
||||||
size = model_size or "70b"
|
|
||||||
|
|
||||||
if resolved == "claude":
|
if resolved == "claude":
|
||||||
from timmy.backends import ClaudeBackend
|
from timmy.backends import ClaudeBackend
|
||||||
@@ -316,11 +301,6 @@ def create_timmy(
|
|||||||
|
|
||||||
return GrokBackend()
|
return GrokBackend()
|
||||||
|
|
||||||
if resolved == "airllm":
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
return TimmyAirLLMAgent(model_size=size)
|
|
||||||
|
|
||||||
# Default: Ollama via Agno.
|
# Default: Ollama via Agno.
|
||||||
model_name, is_fallback = _resolve_model_with_fallback(
|
model_name, is_fallback = _resolve_model_with_fallback(
|
||||||
requested_model=None,
|
requested_model=None,
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
"""LLM backends — AirLLM (local big models), Grok (xAI), and Claude (Anthropic).
|
"""LLM backends — Grok (xAI) and Claude (Anthropic).
|
||||||
|
|
||||||
Provides drop-in replacements for the Agno Agent that expose the same
|
Provides drop-in replacements for the Agno Agent that expose the same
|
||||||
run(message, stream) → RunResult interface used by the dashboard and the
|
run(message, stream) → RunResult interface used by the dashboard and the
|
||||||
print_response(message, stream) interface used by the CLI.
|
print_response(message, stream) interface used by the CLI.
|
||||||
|
|
||||||
Backends:
|
Backends:
|
||||||
- TimmyAirLLMAgent: Local 8B/70B/405B via AirLLM (Apple Silicon or PyTorch)
|
|
||||||
- GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
|
- GrokBackend: xAI Grok API via OpenAI-compatible SDK (opt-in premium)
|
||||||
- ClaudeBackend: Anthropic Claude API — lightweight cloud fallback
|
- ClaudeBackend: Anthropic Claude API — lightweight cloud fallback
|
||||||
|
|
||||||
@@ -16,21 +15,11 @@ import logging
|
|||||||
import platform
|
import platform
|
||||||
import time
|
import time
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Literal
|
|
||||||
|
|
||||||
from timmy.prompts import get_system_prompt
|
from timmy.prompts import get_system_prompt
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# HuggingFace model IDs for each supported size.
|
|
||||||
_AIRLLM_MODELS: dict[str, str] = {
|
|
||||||
"8b": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
|
||||||
"70b": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
|
||||||
"405b": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
|
||||||
}
|
|
||||||
|
|
||||||
ModelSize = Literal["8b", "70b", "405b"]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class RunResult:
|
class RunResult:
|
||||||
@@ -45,108 +34,6 @@ def is_apple_silicon() -> bool:
|
|||||||
return platform.system() == "Darwin" and platform.machine() == "arm64"
|
return platform.system() == "Darwin" and platform.machine() == "arm64"
|
||||||
|
|
||||||
|
|
||||||
def airllm_available() -> bool:
|
|
||||||
"""Return True when the airllm package is importable."""
|
|
||||||
try:
|
|
||||||
import airllm # noqa: F401
|
|
||||||
|
|
||||||
return True
|
|
||||||
except ImportError:
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
class TimmyAirLLMAgent:
|
|
||||||
"""Thin AirLLM wrapper compatible with both dashboard and CLI call sites.
|
|
||||||
|
|
||||||
Exposes:
|
|
||||||
run(message, stream) → RunResult(content=...) [dashboard]
|
|
||||||
print_response(message, stream) → None [CLI]
|
|
||||||
|
|
||||||
Maintains a rolling 10-turn in-memory history so Timmy remembers the
|
|
||||||
conversation within a session — no SQLite needed at this layer.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model_size: str = "70b") -> None:
|
|
||||||
model_id = _AIRLLM_MODELS.get(model_size)
|
|
||||||
if model_id is None:
|
|
||||||
raise ValueError(
|
|
||||||
f"Unknown model size {model_size!r}. Choose from: {list(_AIRLLM_MODELS)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_apple_silicon():
|
|
||||||
from airllm import AirLLMMLX # type: ignore[import]
|
|
||||||
|
|
||||||
self._model = AirLLMMLX(model_id)
|
|
||||||
else:
|
|
||||||
from airllm import AutoModel # type: ignore[import]
|
|
||||||
|
|
||||||
self._model = AutoModel.from_pretrained(model_id)
|
|
||||||
|
|
||||||
self._history: list[str] = []
|
|
||||||
self._model_size = model_size
|
|
||||||
|
|
||||||
# ── public interface (mirrors Agno Agent) ────────────────────────────────
|
|
||||||
|
|
||||||
def run(self, message: str, *, stream: bool = False) -> RunResult:
|
|
||||||
"""Run inference and return a structured result (matches Agno Agent.run()).
|
|
||||||
|
|
||||||
`stream` is accepted for API compatibility; AirLLM always generates
|
|
||||||
the full output in one pass.
|
|
||||||
"""
|
|
||||||
prompt = self._build_prompt(message)
|
|
||||||
|
|
||||||
input_tokens = self._model.tokenizer(
|
|
||||||
[prompt],
|
|
||||||
return_tensors="pt",
|
|
||||||
padding=True,
|
|
||||||
truncation=True,
|
|
||||||
max_length=2048,
|
|
||||||
)
|
|
||||||
output = self._model.generate(
|
|
||||||
**input_tokens,
|
|
||||||
max_new_tokens=512,
|
|
||||||
use_cache=True,
|
|
||||||
do_sample=True,
|
|
||||||
temperature=0.7,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Decode only the newly generated tokens, not the prompt.
|
|
||||||
input_len = input_tokens["input_ids"].shape[1]
|
|
||||||
response = self._model.tokenizer.decode(
|
|
||||||
output[0][input_len:], skip_special_tokens=True
|
|
||||||
).strip()
|
|
||||||
|
|
||||||
self._history.append(f"User: {message}")
|
|
||||||
self._history.append(f"Timmy: {response}")
|
|
||||||
|
|
||||||
return RunResult(content=response)
|
|
||||||
|
|
||||||
def print_response(self, message: str, *, stream: bool = True) -> None:
|
|
||||||
"""Run inference and render the response to stdout (CLI interface)."""
|
|
||||||
result = self.run(message, stream=stream)
|
|
||||||
self._render(result.content)
|
|
||||||
|
|
||||||
# ── private helpers ──────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def _build_prompt(self, message: str) -> str:
|
|
||||||
context = get_system_prompt(tools_enabled=False, session_id="airllm") + "\n\n"
|
|
||||||
# Include the last 10 turns (5 exchanges) for continuity.
|
|
||||||
if self._history:
|
|
||||||
context += "\n".join(self._history[-10:]) + "\n\n"
|
|
||||||
return context + f"User: {message}\nTimmy:"
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _render(text: str) -> None:
|
|
||||||
"""Print response with rich markdown when available, plain text otherwise."""
|
|
||||||
try:
|
|
||||||
from rich.console import Console
|
|
||||||
from rich.markdown import Markdown
|
|
||||||
|
|
||||||
Console().print(Markdown(text))
|
|
||||||
except ImportError:
|
|
||||||
print(text)
|
|
||||||
|
|
||||||
|
|
||||||
# ── Grok (xAI) Backend ─────────────────────────────────────────────────────
|
# ── Grok (xAI) Backend ─────────────────────────────────────────────────────
|
||||||
# Premium cloud augmentation — opt-in only, never the default path.
|
# Premium cloud augmentation — opt-in only, never the default path.
|
||||||
|
|
||||||
@@ -187,7 +74,7 @@ class GrokBackend:
|
|||||||
Uses the OpenAI-compatible SDK to connect to xAI's API.
|
Uses the OpenAI-compatible SDK to connect to xAI's API.
|
||||||
Only activated when GROK_ENABLED=true and XAI_API_KEY is set.
|
Only activated when GROK_ENABLED=true and XAI_API_KEY is set.
|
||||||
|
|
||||||
Exposes the same interface as TimmyAirLLMAgent and Agno Agent:
|
Exposes the same interface as Agno Agent:
|
||||||
run(message, stream) → RunResult [dashboard]
|
run(message, stream) → RunResult [dashboard]
|
||||||
print_response(message, stream) → None [CLI]
|
print_response(message, stream) → None [CLI]
|
||||||
health_check() → dict [monitoring]
|
health_check() → dict [monitoring]
|
||||||
@@ -437,8 +324,7 @@ CLAUDE_MODELS: dict[str, str] = {
|
|||||||
class ClaudeBackend:
|
class ClaudeBackend:
|
||||||
"""Anthropic Claude backend — cloud fallback when local models are offline.
|
"""Anthropic Claude backend — cloud fallback when local models are offline.
|
||||||
|
|
||||||
Uses the official Anthropic SDK. Same interface as GrokBackend and
|
Uses the official Anthropic SDK. Same interface as GrokBackend:
|
||||||
TimmyAirLLMAgent:
|
|
||||||
run(message, stream) → RunResult [dashboard]
|
run(message, stream) → RunResult [dashboard]
|
||||||
print_response(message, stream) → None [CLI]
|
print_response(message, stream) → None [CLI]
|
||||||
health_check() → dict [monitoring]
|
health_check() → dict [monitoring]
|
||||||
|
|||||||
@@ -22,13 +22,13 @@ _BACKEND_OPTION = typer.Option(
|
|||||||
None,
|
None,
|
||||||
"--backend",
|
"--backend",
|
||||||
"-b",
|
"-b",
|
||||||
help="Inference backend: 'ollama' (default) | 'airllm' | 'auto'",
|
help="Inference backend: 'ollama' (default) | 'grok' | 'claude'",
|
||||||
)
|
)
|
||||||
_MODEL_SIZE_OPTION = typer.Option(
|
_MODEL_SIZE_OPTION = typer.Option(
|
||||||
None,
|
None,
|
||||||
"--model-size",
|
"--model-size",
|
||||||
"-s",
|
"-s",
|
||||||
help="AirLLM model size when --backend airllm: '8b' | '70b' | '405b'",
|
help="Model size (reserved for future use).",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ def get_system_info() -> dict[str, Any]:
|
|||||||
- python_version: Python version
|
- python_version: Python version
|
||||||
- platform: OS platform
|
- platform: OS platform
|
||||||
- model: Current Ollama model (queried from API)
|
- model: Current Ollama model (queried from API)
|
||||||
- model_backend: Configured backend (ollama/airllm/grok)
|
- model_backend: Configured backend (ollama/grok/claude)
|
||||||
- ollama_url: Ollama host URL
|
- ollama_url: Ollama host URL
|
||||||
- repo_root: Repository root path
|
- repo_root: Repository root path
|
||||||
- grok_enabled: Whether GROK is enabled
|
- grok_enabled: Whether GROK is enabled
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ except ImportError:
|
|||||||
# agno is a core dependency (always installed) — do NOT stub it, or its
|
# agno is a core dependency (always installed) — do NOT stub it, or its
|
||||||
# internal import chains break under xdist parallel workers.
|
# internal import chains break under xdist parallel workers.
|
||||||
for _mod in [
|
for _mod in [
|
||||||
"airllm",
|
|
||||||
"mcp",
|
"mcp",
|
||||||
"mcp.client",
|
"mcp.client",
|
||||||
"mcp.client.stdio",
|
"mcp.client.stdio",
|
||||||
|
|||||||
@@ -10,12 +10,10 @@ Categories:
|
|||||||
M3xx iOS keyboard & zoom prevention
|
M3xx iOS keyboard & zoom prevention
|
||||||
M4xx HTMX robustness (double-submit, sync)
|
M4xx HTMX robustness (double-submit, sync)
|
||||||
M5xx Safe-area / notch support
|
M5xx Safe-area / notch support
|
||||||
M6xx AirLLM backend interface contract
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
|
||||||
|
|
||||||
# ── helpers ───────────────────────────────────────────────────────────────────
|
# ── helpers ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -206,147 +204,3 @@ def test_M505_dvh_units_used():
|
|||||||
"""Dynamic viewport height (dvh) accounts for collapsing browser chrome."""
|
"""Dynamic viewport height (dvh) accounts for collapsing browser chrome."""
|
||||||
css = _css()
|
css = _css()
|
||||||
assert "dvh" in css
|
assert "dvh" in css
|
||||||
|
|
||||||
|
|
||||||
# ── M6xx — AirLLM backend interface contract ──────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def test_M601_airllm_agent_has_run_method():
|
|
||||||
"""TimmyAirLLMAgent must expose run() so the dashboard route can call it."""
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
assert hasattr(TimmyAirLLMAgent, "run"), (
|
|
||||||
"TimmyAirLLMAgent is missing run() — dashboard will fail with AirLLM backend"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_M602_airllm_run_returns_content_attribute():
|
|
||||||
"""run() must return an object with a .content attribute (Agno RunResponse compat)."""
|
|
||||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
agent = TimmyAirLLMAgent(model_size="8b")
|
|
||||||
|
|
||||||
mock_model = MagicMock()
|
|
||||||
mock_tokenizer = MagicMock()
|
|
||||||
input_ids_mock = MagicMock()
|
|
||||||
input_ids_mock.shape = [1, 5]
|
|
||||||
mock_tokenizer.return_value = {"input_ids": input_ids_mock}
|
|
||||||
mock_tokenizer.decode.return_value = "Sir, affirmative."
|
|
||||||
mock_model.tokenizer = mock_tokenizer
|
|
||||||
mock_model.generate.return_value = [list(range(10))]
|
|
||||||
agent._model = mock_model
|
|
||||||
|
|
||||||
result = agent.run("test")
|
|
||||||
assert hasattr(result, "content"), "run() result must have a .content attribute"
|
|
||||||
assert isinstance(result.content, str)
|
|
||||||
|
|
||||||
|
|
||||||
def test_M603_airllm_run_updates_history():
|
|
||||||
"""run() must update _history so multi-turn context is preserved."""
|
|
||||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
agent = TimmyAirLLMAgent(model_size="8b")
|
|
||||||
|
|
||||||
mock_model = MagicMock()
|
|
||||||
mock_tokenizer = MagicMock()
|
|
||||||
input_ids_mock = MagicMock()
|
|
||||||
input_ids_mock.shape = [1, 5]
|
|
||||||
mock_tokenizer.return_value = {"input_ids": input_ids_mock}
|
|
||||||
mock_tokenizer.decode.return_value = "Acknowledged."
|
|
||||||
mock_model.tokenizer = mock_tokenizer
|
|
||||||
mock_model.generate.return_value = [list(range(10))]
|
|
||||||
agent._model = mock_model
|
|
||||||
|
|
||||||
assert len(agent._history) == 0
|
|
||||||
agent.run("hello")
|
|
||||||
assert len(agent._history) == 2
|
|
||||||
assert any("hello" in h for h in agent._history)
|
|
||||||
|
|
||||||
|
|
||||||
def test_M604_airllm_print_response_delegates_to_run():
|
|
||||||
"""print_response must use run() so both interfaces share one inference path."""
|
|
||||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
|
||||||
from timmy.backends import RunResult, TimmyAirLLMAgent
|
|
||||||
|
|
||||||
agent = TimmyAirLLMAgent(model_size="8b")
|
|
||||||
|
|
||||||
with (
|
|
||||||
patch.object(agent, "run", return_value=RunResult(content="ok")) as mock_run,
|
|
||||||
patch.object(agent, "_render"),
|
|
||||||
):
|
|
||||||
agent.print_response("hello", stream=True)
|
|
||||||
|
|
||||||
mock_run.assert_called_once_with("hello", stream=True)
|
|
||||||
|
|
||||||
|
|
||||||
def test_M605_health_status_passes_model_to_template(client):
|
|
||||||
"""Health status partial must receive the configured model name, not a hardcoded string."""
|
|
||||||
from config import settings
|
|
||||||
|
|
||||||
with patch(
|
|
||||||
"dashboard.routes.health.check_ollama",
|
|
||||||
new_callable=AsyncMock,
|
|
||||||
return_value=True,
|
|
||||||
):
|
|
||||||
response = client.get("/health/status")
|
|
||||||
# Model name should come from settings, not be hardcoded
|
|
||||||
assert response.status_code == 200
|
|
||||||
model_short = settings.ollama_model.split(":")[0]
|
|
||||||
assert model_short in response.text
|
|
||||||
|
|
||||||
|
|
||||||
# ── M7xx — XSS prevention ─────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def _mobile_html() -> str:
|
|
||||||
"""Read the mobile template source."""
|
|
||||||
path = Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "mobile.html"
|
|
||||||
return path.read_text()
|
|
||||||
|
|
||||||
|
|
||||||
def _swarm_live_html() -> str:
|
|
||||||
"""Read the swarm live template source."""
|
|
||||||
path = (
|
|
||||||
Path(__file__).parent.parent.parent / "src" / "dashboard" / "templates" / "swarm_live.html"
|
|
||||||
)
|
|
||||||
return path.read_text()
|
|
||||||
|
|
||||||
|
|
||||||
def test_M701_mobile_chat_no_raw_message_interpolation():
|
|
||||||
"""mobile.html must not interpolate ${message} directly into innerHTML — XSS risk."""
|
|
||||||
html = _mobile_html()
|
|
||||||
# The vulnerable pattern is `${message}` inside a template literal assigned to innerHTML
|
|
||||||
# After the fix, message must only appear via textContent assignment
|
|
||||||
assert "textContent = message" in html or "textContent=message" in html, (
|
|
||||||
"mobile.html still uses innerHTML + ${message} interpolation — XSS vulnerability"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_M702_mobile_chat_user_input_not_in_innerhtml_template_literal():
|
|
||||||
"""${message} must not appear inside a backtick string that is assigned to innerHTML."""
|
|
||||||
html = _mobile_html()
|
|
||||||
# Find all innerHTML += `...` blocks and verify none contain ${message}
|
|
||||||
blocks = re.findall(r"innerHTML\s*\+=?\s*`([^`]*)`", html, re.DOTALL)
|
|
||||||
for block in blocks:
|
|
||||||
assert "${message}" not in block, (
|
|
||||||
"innerHTML template literal still contains ${message} — XSS vulnerability"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_M703_swarm_live_agent_name_not_interpolated_in_innerhtml():
|
|
||||||
"""swarm_live.html must not put ${agent.name} inside innerHTML template literals."""
|
|
||||||
html = _swarm_live_html()
|
|
||||||
blocks = re.findall(r"innerHTML\s*=\s*agents\.map\([^;]+\)\.join\([^)]*\)", html, re.DOTALL)
|
|
||||||
assert len(blocks) == 0, (
|
|
||||||
"swarm_live.html still uses innerHTML=agents.map(…) with interpolated agent data — XSS vulnerability"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_M704_swarm_live_uses_textcontent_for_agent_data():
|
|
||||||
"""swarm_live.html must use textContent (not innerHTML) to set agent name/description."""
|
|
||||||
html = _swarm_live_html()
|
|
||||||
assert "textContent" in html, (
|
|
||||||
"swarm_live.html does not use textContent — agent data may be raw-interpolated into DOM"
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -81,7 +81,6 @@ def test_create_timmy_respects_custom_ollama_url():
|
|||||||
mock_settings.ollama_url = custom_url
|
mock_settings.ollama_url = custom_url
|
||||||
mock_settings.ollama_num_ctx = 4096
|
mock_settings.ollama_num_ctx = 4096
|
||||||
mock_settings.timmy_model_backend = "ollama"
|
mock_settings.timmy_model_backend = "ollama"
|
||||||
mock_settings.airllm_model_size = "70b"
|
|
||||||
|
|
||||||
from timmy.agent import create_timmy
|
from timmy.agent import create_timmy
|
||||||
|
|
||||||
@@ -91,33 +90,6 @@ def test_create_timmy_respects_custom_ollama_url():
|
|||||||
assert kwargs["host"] == custom_url
|
assert kwargs["host"] == custom_url
|
||||||
|
|
||||||
|
|
||||||
# ── AirLLM path ──────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def test_create_timmy_airllm_returns_airllm_agent():
|
|
||||||
"""backend='airllm' must return a TimmyAirLLMAgent, not an Agno Agent."""
|
|
||||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
|
||||||
from timmy.agent import create_timmy
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
result = create_timmy(backend="airllm", model_size="8b")
|
|
||||||
|
|
||||||
assert isinstance(result, TimmyAirLLMAgent)
|
|
||||||
|
|
||||||
|
|
||||||
def test_create_timmy_airllm_does_not_call_agno_agent():
|
|
||||||
"""When using the airllm backend, Agno Agent should never be instantiated."""
|
|
||||||
with (
|
|
||||||
patch("timmy.agent.Agent") as MockAgent,
|
|
||||||
patch("timmy.backends.is_apple_silicon", return_value=False),
|
|
||||||
):
|
|
||||||
from timmy.agent import create_timmy
|
|
||||||
|
|
||||||
create_timmy(backend="airllm", model_size="8b")
|
|
||||||
|
|
||||||
MockAgent.assert_not_called()
|
|
||||||
|
|
||||||
|
|
||||||
def test_create_timmy_explicit_ollama_ignores_autodetect():
|
def test_create_timmy_explicit_ollama_ignores_autodetect():
|
||||||
"""backend='ollama' must always use Ollama, even on Apple Silicon."""
|
"""backend='ollama' must always use Ollama, even on Apple Silicon."""
|
||||||
with (
|
with (
|
||||||
@@ -141,7 +113,6 @@ def test_create_timmy_explicit_ollama_ignores_autodetect():
|
|||||||
def test_resolve_backend_explicit_takes_priority():
|
def test_resolve_backend_explicit_takes_priority():
|
||||||
from timmy.agent import _resolve_backend
|
from timmy.agent import _resolve_backend
|
||||||
|
|
||||||
assert _resolve_backend("airllm") == "airllm"
|
|
||||||
assert _resolve_backend("ollama") == "ollama"
|
assert _resolve_backend("ollama") == "ollama"
|
||||||
|
|
||||||
|
|
||||||
@@ -152,39 +123,6 @@ def test_resolve_backend_defaults_to_ollama_without_config():
|
|||||||
assert _resolve_backend(None) == "ollama"
|
assert _resolve_backend(None) == "ollama"
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_backend_auto_uses_airllm_on_apple_silicon():
|
|
||||||
"""'auto' on Apple Silicon with airllm stubbed → 'airllm'."""
|
|
||||||
with (
|
|
||||||
patch("timmy.backends.is_apple_silicon", return_value=True),
|
|
||||||
patch("timmy.agent.settings") as mock_settings,
|
|
||||||
):
|
|
||||||
mock_settings.timmy_model_backend = "auto"
|
|
||||||
mock_settings.airllm_model_size = "70b"
|
|
||||||
mock_settings.ollama_model = "llama3.2"
|
|
||||||
|
|
||||||
from timmy.agent import _resolve_backend
|
|
||||||
|
|
||||||
assert _resolve_backend(None) == "airllm"
|
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_backend_auto_falls_back_on_non_apple():
|
|
||||||
"""'auto' on non-Apple Silicon → 'ollama'."""
|
|
||||||
with (
|
|
||||||
patch("timmy.backends.is_apple_silicon", return_value=False),
|
|
||||||
patch("timmy.agent.settings") as mock_settings,
|
|
||||||
):
|
|
||||||
mock_settings.timmy_model_backend = "auto"
|
|
||||||
mock_settings.airllm_model_size = "70b"
|
|
||||||
mock_settings.ollama_model = "llama3.2"
|
|
||||||
|
|
||||||
from timmy.agent import _resolve_backend
|
|
||||||
|
|
||||||
assert _resolve_backend(None) == "ollama"
|
|
||||||
|
|
||||||
|
|
||||||
# ── _model_supports_tools ────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def test_model_supports_tools_llama32_returns_false():
|
def test_model_supports_tools_llama32_returns_false():
|
||||||
"""llama3.2 (3B) is too small for reliable tool calling."""
|
"""llama3.2 (3B) is too small for reliable tool calling."""
|
||||||
from timmy.agent import _model_supports_tools
|
from timmy.agent import _model_supports_tools
|
||||||
@@ -259,7 +197,6 @@ def test_create_timmy_includes_tools_for_large_model():
|
|||||||
mock_settings.ollama_url = "http://localhost:11434"
|
mock_settings.ollama_url = "http://localhost:11434"
|
||||||
mock_settings.ollama_num_ctx = 4096
|
mock_settings.ollama_num_ctx = 4096
|
||||||
mock_settings.timmy_model_backend = "ollama"
|
mock_settings.timmy_model_backend = "ollama"
|
||||||
mock_settings.airllm_model_size = "70b"
|
|
||||||
mock_settings.telemetry_enabled = False
|
mock_settings.telemetry_enabled = False
|
||||||
|
|
||||||
from timmy.agent import create_timmy
|
from timmy.agent import create_timmy
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
"""Tests for src/timmy/backends.py — AirLLM wrapper and helpers."""
|
"""Tests for src/timmy/backends.py — backend helpers and classes."""
|
||||||
|
|
||||||
import sys
|
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
# ── is_apple_silicon ──────────────────────────────────────────────────────────
|
# ── is_apple_silicon ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@@ -38,183 +35,6 @@ def test_is_apple_silicon_false_on_intel_mac():
|
|||||||
assert is_apple_silicon() is False
|
assert is_apple_silicon() is False
|
||||||
|
|
||||||
|
|
||||||
# ── airllm_available ─────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def test_airllm_available_true_when_stub_in_sys_modules():
|
|
||||||
# conftest already stubs 'airllm' — importable → True.
|
|
||||||
from timmy.backends import airllm_available
|
|
||||||
|
|
||||||
assert airllm_available() is True
|
|
||||||
|
|
||||||
|
|
||||||
def test_airllm_available_false_when_not_importable():
|
|
||||||
# Temporarily remove the stub to simulate airllm not installed.
|
|
||||||
saved = sys.modules.pop("airllm", None)
|
|
||||||
try:
|
|
||||||
from timmy.backends import airllm_available
|
|
||||||
|
|
||||||
assert airllm_available() is False
|
|
||||||
finally:
|
|
||||||
if saved is not None:
|
|
||||||
sys.modules["airllm"] = saved
|
|
||||||
|
|
||||||
|
|
||||||
# ── TimmyAirLLMAgent construction ────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def test_airllm_agent_raises_on_unknown_size():
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="Unknown model size"):
|
|
||||||
TimmyAirLLMAgent(model_size="3b")
|
|
||||||
|
|
||||||
|
|
||||||
def test_airllm_agent_uses_automodel_on_non_apple():
|
|
||||||
"""Non-Apple-Silicon path uses AutoModel.from_pretrained."""
|
|
||||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
TimmyAirLLMAgent(model_size="8b")
|
|
||||||
# sys.modules["airllm"] is a MagicMock; AutoModel.from_pretrained was called.
|
|
||||||
assert sys.modules["airllm"].AutoModel.from_pretrained.called
|
|
||||||
|
|
||||||
|
|
||||||
def test_airllm_agent_uses_mlx_on_apple_silicon():
|
|
||||||
"""Apple Silicon path uses AirLLMMLX, not AutoModel."""
|
|
||||||
with patch("timmy.backends.is_apple_silicon", return_value=True):
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
TimmyAirLLMAgent(model_size="8b")
|
|
||||||
assert sys.modules["airllm"].AirLLMMLX.called
|
|
||||||
|
|
||||||
|
|
||||||
def test_airllm_agent_resolves_correct_model_id_for_70b():
|
|
||||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
|
||||||
from timmy.backends import _AIRLLM_MODELS, TimmyAirLLMAgent
|
|
||||||
|
|
||||||
TimmyAirLLMAgent(model_size="70b")
|
|
||||||
sys.modules["airllm"].AutoModel.from_pretrained.assert_called_with(_AIRLLM_MODELS["70b"])
|
|
||||||
|
|
||||||
|
|
||||||
# ── TimmyAirLLMAgent.print_response ──────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def _make_agent(model_size: str = "8b") -> "TimmyAirLLMAgent": # noqa: F821
|
|
||||||
"""Helper: create an agent with a fully mocked underlying model."""
|
|
||||||
with patch("timmy.backends.is_apple_silicon", return_value=False):
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
agent = TimmyAirLLMAgent(model_size=model_size)
|
|
||||||
|
|
||||||
# Replace the underlying model with a clean mock that returns predictable output.
|
|
||||||
mock_model = MagicMock()
|
|
||||||
mock_tokenizer = MagicMock()
|
|
||||||
# tokenizer() returns a dict-like object with an "input_ids" tensor mock.
|
|
||||||
input_ids_mock = MagicMock()
|
|
||||||
input_ids_mock.shape = [1, 10] # shape[1] = prompt token count = 10
|
|
||||||
token_dict = {"input_ids": input_ids_mock}
|
|
||||||
mock_tokenizer.return_value = token_dict
|
|
||||||
# generate() returns a list of token sequences.
|
|
||||||
mock_tokenizer.decode.return_value = "Sir, affirmative."
|
|
||||||
mock_model.tokenizer = mock_tokenizer
|
|
||||||
mock_model.generate.return_value = [list(range(15))] # 15 tokens total
|
|
||||||
agent._model = mock_model
|
|
||||||
return agent
|
|
||||||
|
|
||||||
|
|
||||||
def test_print_response_calls_generate():
|
|
||||||
agent = _make_agent()
|
|
||||||
agent.print_response("What is sovereignty?", stream=True)
|
|
||||||
agent._model.generate.assert_called_once()
|
|
||||||
|
|
||||||
|
|
||||||
def test_print_response_decodes_only_generated_tokens():
|
|
||||||
agent = _make_agent()
|
|
||||||
agent.print_response("Hello", stream=False)
|
|
||||||
# decode should be called with tokens starting at index 10 (prompt length).
|
|
||||||
decode_call = agent._model.tokenizer.decode.call_args
|
|
||||||
token_slice = decode_call[0][0]
|
|
||||||
assert list(token_slice) == list(range(10, 15))
|
|
||||||
|
|
||||||
|
|
||||||
def test_print_response_updates_history():
|
|
||||||
agent = _make_agent()
|
|
||||||
agent.print_response("First message")
|
|
||||||
assert any("First message" in turn for turn in agent._history)
|
|
||||||
assert any("Timmy:" in turn for turn in agent._history)
|
|
||||||
|
|
||||||
|
|
||||||
def test_print_response_history_included_in_second_prompt():
|
|
||||||
agent = _make_agent()
|
|
||||||
agent.print_response("First")
|
|
||||||
# Build the prompt for the second call — history should appear.
|
|
||||||
prompt = agent._build_prompt("Second")
|
|
||||||
assert "First" in prompt
|
|
||||||
assert "Second" in prompt
|
|
||||||
|
|
||||||
|
|
||||||
def test_print_response_stream_flag_accepted():
|
|
||||||
"""stream=False should not raise — it's accepted for API compatibility."""
|
|
||||||
agent = _make_agent()
|
|
||||||
agent.print_response("hello", stream=False) # no error
|
|
||||||
|
|
||||||
|
|
||||||
# ── Prompt formatting tests ────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def test_airllm_prompt_contains_formatted_model_name():
|
|
||||||
"""AirLLM prompt should have actual model name, not literal {model_name}."""
|
|
||||||
with (
|
|
||||||
patch("timmy.backends.is_apple_silicon", return_value=False),
|
|
||||||
patch("config.settings") as mock_settings,
|
|
||||||
):
|
|
||||||
mock_settings.ollama_model = "llama3.2:3b"
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
agent = TimmyAirLLMAgent(model_size="8b")
|
|
||||||
prompt = agent._build_prompt("test message")
|
|
||||||
|
|
||||||
# Should contain the actual model name, not the placeholder
|
|
||||||
assert "{model_name}" not in prompt
|
|
||||||
assert "llama3.2:3b" in prompt
|
|
||||||
|
|
||||||
|
|
||||||
def test_airllm_prompt_gets_lite_tier():
|
|
||||||
"""AirLLM should get LITE tier prompt (tools_enabled=False)."""
|
|
||||||
with (
|
|
||||||
patch("timmy.backends.is_apple_silicon", return_value=False),
|
|
||||||
patch("config.settings") as mock_settings,
|
|
||||||
):
|
|
||||||
mock_settings.ollama_model = "test-model"
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
agent = TimmyAirLLMAgent(model_size="8b")
|
|
||||||
prompt = agent._build_prompt("test message")
|
|
||||||
|
|
||||||
# LITE tier should NOT have TOOL USAGE section
|
|
||||||
assert "TOOL USAGE" not in prompt
|
|
||||||
# LITE tier should have the basic rules
|
|
||||||
assert "Be brief by default" in prompt
|
|
||||||
|
|
||||||
|
|
||||||
def test_airllm_prompt_contains_session_id():
|
|
||||||
"""AirLLM prompt should have session_id formatted, not placeholder."""
|
|
||||||
with (
|
|
||||||
patch("timmy.backends.is_apple_silicon", return_value=False),
|
|
||||||
patch("config.settings") as mock_settings,
|
|
||||||
):
|
|
||||||
mock_settings.ollama_model = "test-model"
|
|
||||||
from timmy.backends import TimmyAirLLMAgent
|
|
||||||
|
|
||||||
agent = TimmyAirLLMAgent(model_size="8b")
|
|
||||||
prompt = agent._build_prompt("test message")
|
|
||||||
|
|
||||||
# Should contain the session_id, not the placeholder
|
|
||||||
assert '{session_id}"' not in prompt
|
|
||||||
assert 'session "airllm"' in prompt
|
|
||||||
|
|
||||||
|
|
||||||
# ── ClaudeBackend ─────────────────────────────────────────────────────────
|
# ── ClaudeBackend ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -107,19 +107,7 @@ def test_chat_new_session_uses_unique_id():
|
|||||||
|
|
||||||
|
|
||||||
def test_chat_passes_backend_option():
|
def test_chat_passes_backend_option():
|
||||||
"""chat --backend airllm must forward the backend to create_timmy."""
|
pass
|
||||||
mock_run_output = MagicMock()
|
|
||||||
mock_run_output.content = "OK"
|
|
||||||
mock_run_output.status = "COMPLETED"
|
|
||||||
mock_run_output.active_requirements = []
|
|
||||||
|
|
||||||
mock_timmy = MagicMock()
|
|
||||||
mock_timmy.run.return_value = mock_run_output
|
|
||||||
|
|
||||||
with patch("timmy.cli.create_timmy", return_value=mock_timmy) as mock_create:
|
|
||||||
runner.invoke(app, ["chat", "test", "--backend", "airllm"])
|
|
||||||
|
|
||||||
mock_create.assert_called_once_with(backend="airllm", model_size=None, session_id="cli")
|
|
||||||
|
|
||||||
|
|
||||||
def test_chat_cleans_response():
|
def test_chat_cleans_response():
|
||||||
|
|||||||
Reference in New Issue
Block a user