Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Staging Verification Gate / verify-staging (push) Has been cancelled
CI / test (pull_request) Failing after 51s
CI / validate (pull_request) Failing after 51s
Review Approval Gate / verify-review (pull_request) Failing after 7s
93 lines
3.7 KiB
Python
93 lines
3.7 KiB
Python
"""Tests for llama_client."""
|
|
from unittest.mock import patch
|
|
from pathlib import Path
|
|
import pytest, sys
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
from bin.llama_client import LlamaClient, ChatMessage, HealthStatus
|
|
|
|
class TestChatMessage:
|
|
def test_creation(self):
|
|
m = ChatMessage("user", "Hello")
|
|
assert m.role == "user" and m.content == "Hello"
|
|
|
|
class TestHealthStatus:
|
|
def test_healthy(self):
|
|
s = HealthStatus(True, "http://x:11435", model_loaded=True)
|
|
assert s.healthy and s.model_loaded
|
|
|
|
class TestLlamaClient:
|
|
def test_defaults(self):
|
|
c = LlamaClient()
|
|
assert c.endpoint == "http://localhost:11435" and c.model == "qwen2.5-7b"
|
|
|
|
def test_custom(self):
|
|
c = LlamaClient("http://x:8080", "mistral")
|
|
assert c.endpoint == "http://x:8080" and c.model == "mistral"
|
|
|
|
def test_trailing_slash(self):
|
|
assert LlamaClient("http://x/").endpoint == "http://x"
|
|
|
|
@patch("bin.llama_client._http_get")
|
|
def test_health_ok(self, m):
|
|
m.return_value = {"status": "ok"}
|
|
assert LlamaClient().health_check().healthy is True
|
|
|
|
@patch("bin.llama_client._http_get")
|
|
def test_health_fail(self, m):
|
|
m.side_effect = ConnectionError("down")
|
|
s = LlamaClient().health_check()
|
|
assert s.healthy is False and "down" in s.error
|
|
|
|
@patch("bin.llama_client._http_get")
|
|
def test_is_healthy(self, m):
|
|
m.return_value = {"status": "ok"}
|
|
assert LlamaClient().is_healthy() is True
|
|
|
|
@patch("bin.llama_client._http_get")
|
|
def test_list_models(self, m):
|
|
m.return_value = {"data": [{"id": "qwen"}]}
|
|
assert len(LlamaClient().list_models()) == 1
|
|
|
|
@patch("bin.llama_client._http_get")
|
|
def test_list_models_fail(self, m):
|
|
m.side_effect = ConnectionError()
|
|
assert LlamaClient().list_models() == []
|
|
|
|
@patch("bin.llama_client._http_post")
|
|
def test_chat(self, m):
|
|
m.return_value = {"choices": [{"message": {"content": "Hi"}, "finish_reason": "stop"}], "usage": {"total_tokens": 10}}
|
|
r = LlamaClient().chat([ChatMessage("user", "test")])
|
|
assert r.text == "Hi" and r.tokens_used == 10
|
|
|
|
@patch("bin.llama_client._http_post")
|
|
def test_chat_params(self, m):
|
|
m.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {}}
|
|
LlamaClient().chat([ChatMessage("user", "t")], max_tokens=100, temperature=0.3)
|
|
d = m.call_args[0][1]
|
|
assert d["max_tokens"] == 100 and d["temperature"] == 0.3
|
|
|
|
@patch("bin.llama_client._http_post")
|
|
def test_simple_chat(self, m):
|
|
m.return_value = {"choices": [{"message": {"content": "Yes"}, "finish_reason": "stop"}], "usage": {}}
|
|
assert LlamaClient().simple_chat("test") == "Yes"
|
|
|
|
@patch("bin.llama_client._http_post")
|
|
def test_simple_chat_system(self, m):
|
|
m.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {}}
|
|
LlamaClient().simple_chat("t", system="helpful")
|
|
assert len(m.call_args[0][1]["messages"]) == 2
|
|
|
|
@patch("bin.llama_client._http_post")
|
|
def test_complete(self, m):
|
|
m.return_value = {"content": "result", "tokens_predicted": 50}
|
|
r = LlamaClient().complete("prompt")
|
|
assert r.text == "result" and r.tokens_used == 50
|
|
|
|
@patch("bin.llama_client.time.time")
|
|
@patch("bin.llama_client._http_post")
|
|
def test_benchmark(self, mp, mt):
|
|
mp.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {"total_tokens": 10}}
|
|
mt.side_effect = [0.0, 0.05, 0.05, 0.1, 0.1, 0.15]
|
|
r = LlamaClient().benchmark(iterations=2)
|
|
assert r["iterations"] == 2 and r["avg_latency_ms"] > 0 and r["tok_per_sec"] > 0
|