the-nexus/tests/test_llama_client.py

"""Tests for llama_client."""
from unittest.mock import patch
from pathlib import Path
import pytest, sys
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from bin.llama_client import LlamaClient, ChatMessage, HealthStatus

class TestChatMessage:
    def test_creation(self):
        m = ChatMessage("user", "Hello")
        assert m.role == "user" and m.content == "Hello"

class TestHealthStatus:
    def test_healthy(self):
        s = HealthStatus(True, "http://x:11435", model_loaded=True)
        assert s.healthy and s.model_loaded

class TestLlamaClient:
    def test_defaults(self):
        c = LlamaClient()
        assert c.endpoint == "http://localhost:11435" and c.model == "qwen2.5-7b"

    def test_custom(self):
        c = LlamaClient("http://x:8080", "mistral")
        assert c.endpoint == "http://x:8080" and c.model == "mistral"

    def test_trailing_slash(self):
        assert LlamaClient("http://x/").endpoint == "http://x"

    @patch("bin.llama_client._http_get")
    def test_health_ok(self, m):
        m.return_value = {"status": "ok"}
        assert LlamaClient().health_check().healthy is True

    @patch("bin.llama_client._http_get")
    def test_health_fail(self, m):
        m.side_effect = ConnectionError("down")
        s = LlamaClient().health_check()
        assert s.healthy is False and "down" in s.error

    @patch("bin.llama_client._http_get")
    def test_is_healthy(self, m):
        m.return_value = {"status": "ok"}
        assert LlamaClient().is_healthy() is True

    @patch("bin.llama_client._http_get")
    def test_list_models(self, m):
        m.return_value = {"data": [{"id": "qwen"}]}
        assert len(LlamaClient().list_models()) == 1

    @patch("bin.llama_client._http_get")
    def test_list_models_fail(self, m):
        m.side_effect = ConnectionError()
        assert LlamaClient().list_models() == []

    @patch("bin.llama_client._http_post")
    def test_chat(self, m):
        m.return_value = {"choices": [{"message": {"content": "Hi"}, "finish_reason": "stop"}], "usage": {"total_tokens": 10}}
        r = LlamaClient().chat([ChatMessage("user", "test")])
        assert r.text == "Hi" and r.tokens_used == 10

    @patch("bin.llama_client._http_post")
    def test_chat_params(self, m):
        m.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {}}
        LlamaClient().chat([ChatMessage("user", "t")], max_tokens=100, temperature=0.3)
        d = m.call_args[0][1]
        assert d["max_tokens"] == 100 and d["temperature"] == 0.3

    @patch("bin.llama_client._http_post")
    def test_simple_chat(self, m):
        m.return_value = {"choices": [{"message": {"content": "Yes"}, "finish_reason": "stop"}], "usage": {}}
        assert LlamaClient().simple_chat("test") == "Yes"

    @patch("bin.llama_client._http_post")
    def test_simple_chat_system(self, m):
        m.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {}}
        LlamaClient().simple_chat("t", system="helpful")
        assert len(m.call_args[0][1]["messages"]) == 2

    @patch("bin.llama_client._http_post")
    def test_complete(self, m):
        m.return_value = {"content": "result", "tokens_predicted": 50}
        r = LlamaClient().complete("prompt")
        assert r.text == "result" and r.tokens_used == 50

    @patch("bin.llama_client.time.time")
    @patch("bin.llama_client._http_post")
    def test_benchmark(self, mp, mt):
        mp.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {"total_tokens": 10}}
        mt.side_effect = [0.0, 0.05, 0.05, 0.1, 0.1, 0.15]
        r = LlamaClient().benchmark(iterations=2)
        assert r["iterations"] == 2 and r["avg_latency_ms"] > 0 and r["tok_per_sec"] > 0