Files
the-nexus/tests/test_llama_client.py
Alexander Whitestone ad98bd5ead
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Staging Verification Gate / verify-staging (push) Has been cancelled
CI / test (pull_request) Failing after 51s
CI / validate (pull_request) Failing after 51s
Review Approval Gate / verify-review (pull_request) Failing after 7s
feat: standardize llama.cpp backend for sovereign local inference (#1123)
2026-04-14 01:52:55 +00:00

93 lines
3.7 KiB
Python

"""Tests for llama_client."""
from unittest.mock import patch
from pathlib import Path
import pytest, sys
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from bin.llama_client import LlamaClient, ChatMessage, HealthStatus
class TestChatMessage:
def test_creation(self):
m = ChatMessage("user", "Hello")
assert m.role == "user" and m.content == "Hello"
class TestHealthStatus:
def test_healthy(self):
s = HealthStatus(True, "http://x:11435", model_loaded=True)
assert s.healthy and s.model_loaded
class TestLlamaClient:
def test_defaults(self):
c = LlamaClient()
assert c.endpoint == "http://localhost:11435" and c.model == "qwen2.5-7b"
def test_custom(self):
c = LlamaClient("http://x:8080", "mistral")
assert c.endpoint == "http://x:8080" and c.model == "mistral"
def test_trailing_slash(self):
assert LlamaClient("http://x/").endpoint == "http://x"
@patch("bin.llama_client._http_get")
def test_health_ok(self, m):
m.return_value = {"status": "ok"}
assert LlamaClient().health_check().healthy is True
@patch("bin.llama_client._http_get")
def test_health_fail(self, m):
m.side_effect = ConnectionError("down")
s = LlamaClient().health_check()
assert s.healthy is False and "down" in s.error
@patch("bin.llama_client._http_get")
def test_is_healthy(self, m):
m.return_value = {"status": "ok"}
assert LlamaClient().is_healthy() is True
@patch("bin.llama_client._http_get")
def test_list_models(self, m):
m.return_value = {"data": [{"id": "qwen"}]}
assert len(LlamaClient().list_models()) == 1
@patch("bin.llama_client._http_get")
def test_list_models_fail(self, m):
m.side_effect = ConnectionError()
assert LlamaClient().list_models() == []
@patch("bin.llama_client._http_post")
def test_chat(self, m):
m.return_value = {"choices": [{"message": {"content": "Hi"}, "finish_reason": "stop"}], "usage": {"total_tokens": 10}}
r = LlamaClient().chat([ChatMessage("user", "test")])
assert r.text == "Hi" and r.tokens_used == 10
@patch("bin.llama_client._http_post")
def test_chat_params(self, m):
m.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {}}
LlamaClient().chat([ChatMessage("user", "t")], max_tokens=100, temperature=0.3)
d = m.call_args[0][1]
assert d["max_tokens"] == 100 and d["temperature"] == 0.3
@patch("bin.llama_client._http_post")
def test_simple_chat(self, m):
m.return_value = {"choices": [{"message": {"content": "Yes"}, "finish_reason": "stop"}], "usage": {}}
assert LlamaClient().simple_chat("test") == "Yes"
@patch("bin.llama_client._http_post")
def test_simple_chat_system(self, m):
m.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {}}
LlamaClient().simple_chat("t", system="helpful")
assert len(m.call_args[0][1]["messages"]) == 2
@patch("bin.llama_client._http_post")
def test_complete(self, m):
m.return_value = {"content": "result", "tokens_predicted": 50}
r = LlamaClient().complete("prompt")
assert r.text == "result" and r.tokens_used == 50
@patch("bin.llama_client.time.time")
@patch("bin.llama_client._http_post")
def test_benchmark(self, mp, mt):
mp.return_value = {"choices": [{"message": {"content": "OK"}, "finish_reason": "stop"}], "usage": {"total_tokens": 10}}
mt.side_effect = [0.0, 0.05, 0.05, 0.1, 0.1, 0.15]
r = LlamaClient().benchmark(iterations=2)
assert r["iterations"] == 2 and r["avg_latency_ms"] > 0 and r["tok_per_sec"] > 0