"""Tests for llama_client.""" from unittest.mock import patch from pathlib import Path import pytest, sys sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from bin.llama_client import LlamaClient, ChatMessage, HealthStatus class TestChatMessage: def test_creation(self): m = ChatMessage("user", "Hello") assert m.role == "user" and m.content == "Hello" class TestHealthStatus: def test_healthy(self): s = HealthStatus(True, "http://x:11435", model_loaded=True) assert s.healthy and s.model_loaded class TestLlamaClient: def test_defaults(self): c = LlamaClient() assert c.endpoint == "http://localhost:11435" and c.model == "qwen2.5-7b" def test_custom(self): c = LlamaClient("http://x:8080", "mistral") assert c.endpoint == "http://x:8080" and c.model == "mistral" def test_trailing_slash(self): assert LlamaClient("http://x/").endpoint == "http://x" @patch("bin.llama_client._http_get") def test_health_ok(self, m): m.return_value = {"status": "ok"} assert LlamaClient().health_check().healthy is True @patch("bin.llama_client._http_get") def test_health_fail(self, m): m.side_effect = ConnectionError("down") s = LlamaClient().health_check() assert s.healthy is False and "down" in s.error @patch("bin.llama_client._http_get") def test_is_healthy(self, m): m.return_value = {"status": "ok"} assert LlamaClient().is_healthy() is True @patch("bin.llama_client._http_get") def test_list_models(self, m): m.return_value = {"data": [{"id": "qwen"}]} assert len(LlamaClient().list_models()) == 1 @patch("bin.llama_client._http_get") def test_list_models_fail(self, m): m.side_effect = ConnectionError() assert LlamaClient().list_models() == [] @patch("bin.llama_client._http_post") def test_chat(self, m): m.return_value = {"choices": [{"message": {"content": "Hi"}, "finish_reason": "stop"}], "usage": {"total_tokens": 10}} r = LlamaClient().chat([ChatMessage("user", "test")]) assert r.text == "Hi" and r.tokens_used == 10 @patch("bin.llama_client._http_post") def test_chat_params(self, m): m.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {}} LlamaClient().chat([ChatMessage("user", "t")], max_tokens=100, temperature=0.3) d = m.call_args[0][1] assert d["max_tokens"] == 100 and d["temperature"] == 0.3 @patch("bin.llama_client._http_post") def test_simple_chat(self, m): m.return_value = {"choices": [{"message": {"content": "Yes"}, "finish_reason": "stop"}], "usage": {}} assert LlamaClient().simple_chat("test") == "Yes" @patch("bin.llama_client._http_post") def test_simple_chat_system(self, m): m.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {}} LlamaClient().simple_chat("t", system="helpful") assert len(m.call_args[0][1]["messages"]) == 2 @patch("bin.llama_client._http_post") def test_complete(self, m): m.return_value = {"content": "result", "tokens_predicted": 50} r = LlamaClient().complete("prompt") assert r.text == "result" and r.tokens_used == 50 @patch("bin.llama_client.time.time") @patch("bin.llama_client._http_post") def test_benchmark(self, mp, mt): mp.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {"total_tokens": 10}} mt.side_effect = [0.0, 0.05, 0.05, 0.1, 0.1, 0.15] r = LlamaClient().benchmark(iterations=2) assert r["iterations"] == 2 and r["avg_latency_ms"] > 0 and r["tok_per_sec"] > 0