Compare commits
5 Commits
burn/81-17
...
fix/48-mar
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cd18bd06be | ||
| 492c1cdcfd | |||
| 6e583310a8 | |||
| d2edbdadc2 | |||
| c009d8df77 |
@@ -22,3 +22,6 @@ jobs:
|
||||
run: |
|
||||
if grep -rE 'sk-or-|sk-ant-|ghp_|AKIA' . --include='*.yml' --include='*.py' --include='*.sh' 2>/dev/null | grep -v .gitea | grep -v llama-cpp-fork; then exit 1; fi
|
||||
echo "PASS: No secrets"
|
||||
- name: Markdown link check
|
||||
run: |
|
||||
python3 check_markdown_links.py
|
||||
|
||||
124
check_markdown_links.py
Normal file
124
check_markdown_links.py
Normal file
@@ -0,0 +1,124 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Check local markdown links.
|
||||
|
||||
Scans markdown files for local links and fails on broken targets.
|
||||
Ignores:
|
||||
- external URLs (http/https)
|
||||
- anchors (#section)
|
||||
- mailto: and tel:
|
||||
- links inside fenced code blocks
|
||||
- generated/build directories
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
CODE_FENCE_RE = re.compile(r"^```")
|
||||
LINK_RE = re.compile(r"(?<!!)\[[^\]]+\]\(([^)]+)\)")
|
||||
DEFAULT_SKIP_DIRS = {
|
||||
".git",
|
||||
".gitea",
|
||||
".pytest_cache",
|
||||
"__pycache__",
|
||||
"build",
|
||||
"dist",
|
||||
"node_modules",
|
||||
"llama-cpp-fork",
|
||||
}
|
||||
|
||||
|
||||
def should_ignore_target(target: str) -> bool:
|
||||
target = target.strip()
|
||||
return (
|
||||
not target
|
||||
or target.startswith("http://")
|
||||
or target.startswith("https://")
|
||||
or target.startswith("mailto:")
|
||||
or target.startswith("tel:")
|
||||
or target.startswith("#")
|
||||
)
|
||||
|
||||
|
||||
def normalize_target(target: str) -> str:
|
||||
target = target.strip()
|
||||
if target.startswith("<") and target.endswith(">"):
|
||||
target = target[1:-1].strip()
|
||||
if "#" in target:
|
||||
target = target.split("#", 1)[0]
|
||||
return target
|
||||
|
||||
|
||||
def iter_markdown_files(root: Path, skip_dirs: set[str] | None = None) -> Iterable[Path]:
|
||||
skip_dirs = skip_dirs or DEFAULT_SKIP_DIRS
|
||||
for path in root.rglob("*.md"):
|
||||
if any(part in skip_dirs for part in path.relative_to(root).parts):
|
||||
continue
|
||||
yield path
|
||||
|
||||
|
||||
def iter_links(path: Path) -> Iterable[tuple[int, str]]:
|
||||
in_code_fence = False
|
||||
for line_no, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1):
|
||||
if CODE_FENCE_RE.match(line.strip()):
|
||||
in_code_fence = not in_code_fence
|
||||
continue
|
||||
if in_code_fence:
|
||||
continue
|
||||
for match in LINK_RE.finditer(line):
|
||||
yield line_no, match.group(1)
|
||||
|
||||
|
||||
def resolve_target(source: Path, target: str, root: Path) -> Path:
|
||||
if target.startswith("/"):
|
||||
return (root / target.lstrip("/")).resolve()
|
||||
return (source.parent / target).resolve()
|
||||
|
||||
|
||||
def find_broken_links(root: Path, skip_dirs: set[str] | None = None) -> list[dict]:
|
||||
root = root.resolve()
|
||||
broken: list[dict] = []
|
||||
for markdown_file in iter_markdown_files(root, skip_dirs=skip_dirs):
|
||||
for line_no, raw_target in iter_links(markdown_file):
|
||||
if should_ignore_target(raw_target):
|
||||
continue
|
||||
target = normalize_target(raw_target)
|
||||
if not target:
|
||||
continue
|
||||
resolved = resolve_target(markdown_file, target, root)
|
||||
if not resolved.exists():
|
||||
broken.append(
|
||||
{
|
||||
"source": str(markdown_file),
|
||||
"line": line_no,
|
||||
"target": target,
|
||||
"resolved": str(resolved),
|
||||
}
|
||||
)
|
||||
return broken
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Fail on broken local markdown links.")
|
||||
parser.add_argument("root", nargs="?", default=".", help="Repo root to scan (default: .)")
|
||||
args = parser.parse_args()
|
||||
|
||||
root = Path(args.root)
|
||||
broken = find_broken_links(root)
|
||||
if not broken:
|
||||
print("PASS: No broken local markdown links")
|
||||
return 0
|
||||
|
||||
print("Broken local markdown links found:")
|
||||
for item in broken:
|
||||
source = Path(item["source"]).relative_to(root.resolve())
|
||||
print(f"{source}:{item['line']}: missing target -> {item['target']}")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
3
tests/conftest.py
Normal file
3
tests/conftest.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""Pytest configuration for turboquant."""
|
||||
import sys, os
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
74
tests/test_markdown_link_check.py
Normal file
74
tests/test_markdown_link_check.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
from check_markdown_links import find_broken_links
|
||||
|
||||
|
||||
def write(path: Path, content: str) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(textwrap.dedent(content).lstrip(), encoding="utf-8")
|
||||
|
||||
|
||||
def test_reports_missing_local_markdown_target_with_line_number(tmp_path: Path):
|
||||
write(
|
||||
tmp_path / "README.md",
|
||||
"""
|
||||
# Repo
|
||||
|
||||
See [status](docs/status.md).
|
||||
""",
|
||||
)
|
||||
|
||||
broken = find_broken_links(tmp_path)
|
||||
|
||||
assert len(broken) == 1
|
||||
assert broken[0]["source"].endswith("README.md")
|
||||
assert broken[0]["line"] == 3
|
||||
assert broken[0]["target"] == "docs/status.md"
|
||||
|
||||
|
||||
def test_allows_existing_relative_targets(tmp_path: Path):
|
||||
write(tmp_path / "docs" / "status.md", "# Status\n")
|
||||
write(
|
||||
tmp_path / "README.md",
|
||||
"""
|
||||
# Repo
|
||||
|
||||
See [status](docs/status.md).
|
||||
""",
|
||||
)
|
||||
|
||||
assert find_broken_links(tmp_path) == []
|
||||
|
||||
|
||||
def test_ignores_external_anchor_mailto_and_tel_links(tmp_path: Path):
|
||||
write(
|
||||
tmp_path / "README.md",
|
||||
"""
|
||||
[external](https://example.com)
|
||||
[anchor](#section)
|
||||
[mail](mailto:test@example.com)
|
||||
[call](tel:988)
|
||||
""",
|
||||
)
|
||||
|
||||
assert find_broken_links(tmp_path) == []
|
||||
|
||||
|
||||
def test_ignores_links_inside_fenced_code_blocks(tmp_path: Path):
|
||||
write(
|
||||
tmp_path / "README.md",
|
||||
"""
|
||||
```md
|
||||
[broken](docs/missing.md)
|
||||
```
|
||||
""",
|
||||
)
|
||||
|
||||
assert find_broken_links(tmp_path) == []
|
||||
|
||||
|
||||
def test_skips_build_directories(tmp_path: Path):
|
||||
write(tmp_path / "build" / "README.md", "[broken](missing.md)\n")
|
||||
|
||||
assert find_broken_links(tmp_path) == []
|
||||
338
tests/test_tool_call_integration.py
Normal file
338
tests/test_tool_call_integration.py
Normal file
@@ -0,0 +1,338 @@
|
||||
"""
|
||||
Integration test: turboquant compressed model passes hermes tool calls (issue #82).
|
||||
|
||||
Validates that a TurboQuant-compressed model can:
|
||||
1. Parse hermes tool schemas correctly
|
||||
2. Format tool calls in OpenAI-compatible format
|
||||
3. Pass through the hermes agent conversation loop
|
||||
|
||||
Tests are structured as contract tests -- they validate the schema/format
|
||||
compatibility without requiring a running model server. The live inference
|
||||
test is skipped by default (requires llama-server with TurboQuant model).
|
||||
|
||||
Usage:
|
||||
pytest tests/test_tool_call_integration.py -v
|
||||
pytest tests/test_tool_call_integration.py -v -k live # run live test if server available
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
|
||||
ROOT = pathlib.Path(__file__).resolve().parents[1]
|
||||
PROFILE_PATH = ROOT / "profiles" / "hermes-profile-gemma4-turboquant.yaml"
|
||||
BENCHMARKS_DIR = ROOT / "benchmarks"
|
||||
|
||||
|
||||
class TestHermesProfileSchema(unittest.TestCase):
|
||||
"""Validate the hermes profile YAML has required fields for tool calling."""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
import yaml
|
||||
cls.profile = yaml.safe_load(PROFILE_PATH.read_text())
|
||||
|
||||
def test_profile_has_providers(self):
|
||||
assert "providers" in self.profile, "Profile must define providers"
|
||||
assert "primary" in self.profile["providers"], "Must have primary provider"
|
||||
|
||||
def test_primary_provider_has_endpoint(self):
|
||||
primary = self.profile["providers"]["primary"]
|
||||
assert "endpoint" in primary, "Primary provider must have endpoint"
|
||||
assert primary["endpoint"].startswith("http"), "Endpoint must be HTTP(S) URL"
|
||||
|
||||
def test_primary_provider_has_api_path(self):
|
||||
primary = self.profile["providers"]["primary"]
|
||||
assert "api_path" in primary, "Primary provider must have api_path"
|
||||
assert "/chat/completions" in primary["api_path"], (
|
||||
"api_path should be OpenAI-compatible /chat/completions"
|
||||
)
|
||||
|
||||
def test_turboquant_settings_present(self):
|
||||
primary = self.profile["providers"]["primary"]
|
||||
assert "turboquant" in primary, "Must have turboquant config section"
|
||||
tq = primary["turboquant"]
|
||||
assert tq.get("enabled") is True, "TurboQuant must be enabled"
|
||||
assert tq.get("kv_type") in ("turbo2", "turbo3", "turbo4"), (
|
||||
"kv_type must be turbo2, turbo3, or turbo4"
|
||||
)
|
||||
|
||||
def test_context_window_configured(self):
|
||||
primary = self.profile["providers"]["primary"]
|
||||
assert "context" in primary, "Must have context config"
|
||||
ctx = primary["context"]
|
||||
assert ctx.get("max_tokens", 0) >= 8192, (
|
||||
"max_tokens should be >= 8192 for TurboQuant value proposition"
|
||||
)
|
||||
|
||||
|
||||
class TestToolSchemaCompatibility(unittest.TestCase):
|
||||
"""Verify hermes tool schemas serialize to valid JSON for OpenAI tool_calls."""
|
||||
|
||||
SAMPLE_TOOL_SCHEMAS = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"description": "Read a text file with line numbers.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": {"type": "string", "description": "File path"},
|
||||
"offset": {"type": "integer", "default": 1},
|
||||
"limit": {"type": "integer", "default": 500},
|
||||
},
|
||||
"required": ["path"],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "execute_code",
|
||||
"description": "Run a Python script.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {"type": "string", "description": "Python code"},
|
||||
},
|
||||
"required": ["code"],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_search",
|
||||
"description": "Search the web.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {"type": "string"},
|
||||
"max_results": {"type": "integer", "default": 5},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
def test_tool_schemas_serialize_to_json(self):
|
||||
"""Tool schemas must serialize without errors."""
|
||||
serialized = json.dumps(self.SAMPLE_TOOL_SCHEMAS)
|
||||
assert len(serialized) > 0
|
||||
parsed = json.loads(serialized)
|
||||
assert len(parsed) == len(self.SAMPLE_TOOL_SCHEMAS)
|
||||
|
||||
def test_tool_schemas_have_required_openai_fields(self):
|
||||
"""Each tool schema must have the fields OpenAI expects."""
|
||||
for tool in self.SAMPLE_TOOL_SCHEMAS:
|
||||
assert tool["type"] == "function", "Tool type must be 'function'"
|
||||
fn = tool["function"]
|
||||
assert "name" in fn, "Function must have name"
|
||||
assert "description" in fn, "Function must have description"
|
||||
assert "parameters" in fn, "Function must have parameters"
|
||||
params = fn["parameters"]
|
||||
assert params["type"] == "object", "Parameters type must be 'object'"
|
||||
assert "properties" in params, "Parameters must have properties"
|
||||
|
||||
def test_tool_call_response_format(self):
|
||||
"""Verify tool_call response matches OpenAI format."""
|
||||
tool_call = {
|
||||
"id": "call_abc123",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"arguments": json.dumps({"path": "/tmp/test.txt"}),
|
||||
},
|
||||
}
|
||||
args = json.loads(tool_call["function"]["arguments"])
|
||||
assert args["path"] == "/tmp/test.txt"
|
||||
assert tool_call["function"]["name"] in [
|
||||
t["function"]["name"] for t in self.SAMPLE_TOOL_SCHEMAS
|
||||
]
|
||||
|
||||
def test_tool_names_are_valid_identifiers(self):
|
||||
"""Tool names must be valid Python identifiers for hermes dispatch."""
|
||||
for tool in self.SAMPLE_TOOL_SCHEMAS:
|
||||
name = tool["function"]["name"]
|
||||
assert re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name), (
|
||||
f"Tool name \'{name}\' is not a valid identifier"
|
||||
)
|
||||
|
||||
|
||||
class TestTurboquantServerConfig(unittest.TestCase):
|
||||
"""Validate server startup configuration matches hermes profile."""
|
||||
|
||||
def test_server_command_has_turboquant_flags(self):
|
||||
"""The server command in the profile must include -ctk/-ctv flags."""
|
||||
profile_text = PROFILE_PATH.read_text()
|
||||
assert "-ctk" in profile_text, "Profile server command must include -ctk flag"
|
||||
assert "-ctv" in profile_text, "Profile server command must include -ctv flag"
|
||||
|
||||
def test_server_command_has_context_flag(self):
|
||||
"""Server command must set context size."""
|
||||
profile_text = PROFILE_PATH.read_text()
|
||||
assert re.search(r"-c\s+\d+", profile_text), (
|
||||
"Server command must include -c <context_size> flag"
|
||||
)
|
||||
|
||||
def test_layer_adaptive_env_var(self):
|
||||
"""Profile must set TURBO_LAYER_ADAPTIVE env var."""
|
||||
profile_text = PROFILE_PATH.read_text()
|
||||
assert "TURBO_LAYER_ADAPTIVE" in profile_text, (
|
||||
"Profile must configure TURBO_LAYER_ADAPTIVE"
|
||||
)
|
||||
|
||||
|
||||
class TestBenchmarkData(unittest.TestCase):
|
||||
"""Validate benchmark test prompts include tool-call test cases."""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
prompts_path = BENCHMARKS_DIR / "test_prompts.json"
|
||||
cls.prompts = json.loads(prompts_path.read_text())
|
||||
|
||||
def test_has_tool_call_test_prompt(self):
|
||||
"""Benchmark prompts must include a tool-call format test."""
|
||||
categories = [p.get("category") for p in self.prompts]
|
||||
assert "tool_call_format" in categories, (
|
||||
"Benchmark must include a tool_call_format test case"
|
||||
)
|
||||
|
||||
def test_tool_call_prompt_expects_json(self):
|
||||
"""Tool call test prompt must expect JSON in the response."""
|
||||
tool_prompt = next(
|
||||
p for p in self.prompts if p.get("category") == "tool_call_format"
|
||||
)
|
||||
pattern = tool_prompt.get("expected_pattern", "")
|
||||
assert "json" in pattern.lower() or "\\{" in pattern, (
|
||||
"Tool call prompt must expect JSON-formatted response"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
not os.environ.get("TURBOQUANT_SERVER_URL"),
|
||||
reason="No TurboQuant server available (set TURBOQUANT_SERVER_URL to run)",
|
||||
)
|
||||
class TestLiveToolCallIntegration:
|
||||
"""Live integration test -- requires running llama-server with TurboQuant."""
|
||||
|
||||
def test_server_health(self):
|
||||
"""Server must respond to /v1/models endpoint."""
|
||||
import requests
|
||||
url = os.environ["TURBOQUANT_SERVER_URL"]
|
||||
resp = requests.get(f"{url}/v1/models", timeout=10)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "data" in data
|
||||
assert len(data["data"]) > 0
|
||||
|
||||
def test_tool_call_completion(self):
|
||||
"""Model must return a valid tool_call for a read_file prompt."""
|
||||
import requests
|
||||
url = os.environ["TURBOQUANT_SERVER_URL"]
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"description": "Read a file",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"path": {"type": "string"}},
|
||||
"required": ["path"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
resp = requests.post(
|
||||
f"{url}/v1/chat/completions",
|
||||
json={
|
||||
"model": "gemma-4",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Read the file at /tmp/test.txt"}
|
||||
],
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
choice = data["choices"][0]
|
||||
msg = choice["message"]
|
||||
if "tool_calls" in msg and msg["tool_calls"]:
|
||||
tc = msg["tool_calls"][0]
|
||||
assert tc["type"] == "function"
|
||||
assert tc["function"]["name"] == "read_file"
|
||||
args = json.loads(tc["function"]["arguments"])
|
||||
assert "path" in args
|
||||
else:
|
||||
assert len(msg.get("content", "")) > 0
|
||||
|
||||
def test_tool_call_with_multiple_tools(self):
|
||||
"""Model must handle multiple available tools."""
|
||||
import requests
|
||||
url = os.environ["TURBOQUANT_SERVER_URL"]
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "read_file",
|
||||
"description": "Read a file",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"path": {"type": "string"}},
|
||||
"required": ["path"],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "web_search",
|
||||
"description": "Search the web",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"query": {"type": "string"}},
|
||||
"required": ["query"],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "execute_code",
|
||||
"description": "Run Python code",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"code": {"type": "string"}},
|
||||
"required": ["code"],
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
resp = requests.post(
|
||||
f"{url}/v1/chat/completions",
|
||||
json={
|
||||
"model": "gemma-4",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Search the web for 'bitcoin price'"}
|
||||
],
|
||||
"tools": tools,
|
||||
"tool_choice": "auto",
|
||||
},
|
||||
timeout=120,
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
data = resp.json()
|
||||
assert "choices" in data
|
||||
assert len(data["choices"]) > 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user