Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 16s
Smoke Test / smoke (pull_request) Failing after 17s
Validate Config / YAML Lint (pull_request) Failing after 13s
Validate Config / JSON Validate (pull_request) Successful in 17s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 35s
Validate Config / Shell Script Lint (pull_request) Failing after 21s
Validate Config / Cron Syntax Check (pull_request) Successful in 4s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 6s
Validate Config / Playbook Schema Validation (pull_request) Successful in 21s
PR Checklist / pr-checklist (pull_request) Failing after 3m32s
Architecture Lint / Lint Repository (pull_request) Has been cancelled
Validate Config / Python Test Suite (pull_request) Has been cancelled
659 lines
23 KiB
Python
659 lines
23 KiB
Python
"""Tests for the Quality Gate modules.
|
|
|
|
Tests for:
|
|
- ci_automation_gate.py: linting, function length, auto-fix, counters
|
|
- task_gate.py: pre/post task gate logic, lane checking, filter tags
|
|
|
|
Refs: #629
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
# Add scripts/ to path
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts"))
|
|
|
|
from ci_automation_gate import QualityGate
|
|
|
|
|
|
# ===========================================================================
|
|
# CI AUTOMATION GATE TESTS
|
|
# ===========================================================================
|
|
|
|
# -- helpers ---------------------------------------------------------------
|
|
|
|
def _write_file(dirpath, relpath, content):
|
|
"""Write a file in a temp directory and return its Path."""
|
|
p = Path(dirpath) / relpath
|
|
p.parent.mkdir(parents=True, exist_ok=True)
|
|
p.write_text(content)
|
|
return p
|
|
|
|
|
|
def _run_gate_on_file(dirpath, relpath, content, fix=False):
|
|
"""Write a file, run QualityGate on it, return the gate instance."""
|
|
p = _write_file(dirpath, relpath, content)
|
|
gate = QualityGate(fix=fix)
|
|
gate.check_file(p)
|
|
return gate
|
|
|
|
|
|
# -- trailing whitespace ---------------------------------------------------
|
|
|
|
def test_trailing_whitespace_warns():
|
|
"""Lines with trailing whitespace should produce a warning."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
gate = _run_gate_on_file(tmp, "test.py", "x = 1 \ny = 2\n")
|
|
assert gate.warnings >= 1, "Expected warning for trailing whitespace"
|
|
|
|
|
|
def test_trailing_whitespace_fixes():
|
|
"""With fix=True, trailing whitespace should be removed."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
p = _write_file(tmp, "test.py", "x = 1 \ny = 2\n")
|
|
gate = QualityGate(fix=True)
|
|
gate.check_file(p)
|
|
fixed = p.read_text()
|
|
assert "x = 1 \n" not in fixed, "Trailing whitespace should be removed"
|
|
assert fixed == "x = 1\ny = 2\n"
|
|
|
|
|
|
def test_clean_file_no_warnings():
|
|
"""A clean file should produce no warnings."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
gate = _run_gate_on_file(tmp, "test.py", "x = 1\ny = 2\n")
|
|
assert gate.warnings == 0
|
|
assert gate.failures == 0
|
|
|
|
|
|
# -- missing final newline -------------------------------------------------
|
|
|
|
def test_missing_final_newline_warns():
|
|
"""File without trailing newline should warn."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
gate = _run_gate_on_file(tmp, "test.py", "x = 1")
|
|
assert gate.warnings >= 1, "Expected warning for missing final newline"
|
|
|
|
|
|
def test_missing_final_newline_fixed():
|
|
"""With fix=True, missing final newline should be added."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
p = _write_file(tmp, "test.py", "x = 1")
|
|
gate = QualityGate(fix=True)
|
|
gate.check_file(p)
|
|
fixed = p.read_text()
|
|
assert fixed.endswith("\n"), "Fixed file should end with newline"
|
|
|
|
|
|
# -- function length (JS/TS) -----------------------------------------------
|
|
|
|
def test_short_function_passes():
|
|
"""A short JS function should not warn or fail."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
code = "function hello() {\n return 1;\n}\n"
|
|
gate = _run_gate_on_file(tmp, "test.js", code)
|
|
assert gate.failures == 0
|
|
assert gate.warnings == 0
|
|
|
|
|
|
def test_medium_function_warns():
|
|
"""JS function over 20 lines should warn."""
|
|
body = "\n".join(f" console.log({i});" for i in range(22))
|
|
code = f"function big() {{\n{body}\n}}\n"
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
gate = _run_gate_on_file(tmp, "test.js", code)
|
|
assert gate.warnings >= 1, "Expected warning for function over 20 lines"
|
|
|
|
|
|
def test_long_function_fails():
|
|
"""JS function over 50 lines should fail."""
|
|
body = "\n".join(f" console.log({i});" for i in range(52))
|
|
code = f"function huge() {{\n{body}\n}}\n"
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
gate = _run_gate_on_file(tmp, "test.js", code)
|
|
assert gate.failures >= 1, "Expected failure for function over 50 lines"
|
|
|
|
|
|
def test_python_function_length_not_checked():
|
|
"""Python functions should not be checked by the JS regex."""
|
|
body = "\n".join(f" print({i})" for i in range(60))
|
|
code = f"def huge():\n{body}\n"
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
gate = _run_gate_on_file(tmp, "test.py", code)
|
|
assert gate.failures == 0, "Python functions should not trigger JS length check"
|
|
|
|
|
|
# -- file type filtering ---------------------------------------------------
|
|
|
|
def test_non_code_file_skipped():
|
|
"""Non-code files (.md, .json, .txt) should be skipped."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
gate = _run_gate_on_file(tmp, "README.md", "# Title \ntrailing ws\n")
|
|
assert gate.warnings == 0, "Markdown files should be skipped"
|
|
assert gate.failures == 0
|
|
|
|
|
|
def test_typescript_checked():
|
|
"""TypeScript files should be checked."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
gate = _run_gate_on_file(tmp, "test.ts", "x = 1 \n")
|
|
assert gate.warnings >= 1, "TypeScript files should be checked"
|
|
|
|
|
|
# -- directory traversal ---------------------------------------------------
|
|
|
|
def test_run_scans_directory():
|
|
"""Gate.run() should scan all files in a directory tree."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
_write_file(tmp, "clean.py", "x = 1\n")
|
|
_write_file(tmp, "dirty.js", "x = 1 \n")
|
|
_write_file(tmp, "sub/nested.ts", "y = 2 \n")
|
|
gate = QualityGate()
|
|
gate.run(tmp)
|
|
assert gate.warnings >= 2, "Should find trailing whitespace in both dirty files"
|
|
|
|
|
|
def test_run_skips_node_modules():
|
|
"""Gate.run() should skip node_modules directories."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
_write_file(tmp, "clean.py", "x = 1\n")
|
|
_write_file(tmp, "node_modules/pkg/index.js", "x = 1 \n")
|
|
gate = QualityGate()
|
|
gate.run(tmp)
|
|
assert gate.warnings == 0, "node_modules should be skipped"
|
|
|
|
|
|
def test_run_skips_git_dir():
|
|
"""Gate.run() should skip .git directories."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
_write_file(tmp, "clean.py", "x = 1\n")
|
|
_write_file(tmp, ".git/hooks/pre-commit", "x = 1 \n")
|
|
gate = QualityGate()
|
|
gate.run(tmp)
|
|
assert gate.warnings == 0, ".git should be skipped"
|
|
|
|
|
|
# -- exit code -------------------------------------------------------------
|
|
|
|
def test_failures_cause_exit_code_1():
|
|
"""Gate with failures should exit with code 1."""
|
|
import subprocess
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
body = "\n".join(f" console.log({i});" for i in range(52))
|
|
_write_file(tmp, "huge.js", f"function f() {{\n{body}\n}}\n")
|
|
r = subprocess.run(
|
|
[sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "ci_automation_gate.py"), tmp],
|
|
capture_output=True, text=True
|
|
)
|
|
assert r.returncode == 1, f"Expected exit 1, got {r.returncode}"
|
|
|
|
|
|
def test_clean_directory_exits_0():
|
|
"""Gate on clean directory should exit 0."""
|
|
import subprocess
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
_write_file(tmp, "clean.py", "x = 1\ny = 2\n")
|
|
r = subprocess.run(
|
|
[sys.executable, str(Path(__file__).resolve().parent.parent / "scripts" / "ci_automation_gate.py"), tmp],
|
|
capture_output=True, text=True
|
|
)
|
|
assert r.returncode == 0, f"Expected exit 0, got {r.returncode}"
|
|
|
|
|
|
# ===========================================================================
|
|
# TASK GATE TESTS
|
|
# ===========================================================================
|
|
|
|
# Import task_gate functions directly — test the pure logic
|
|
from task_gate import check_agent_lane, FILTER_TAGS, AGENT_USERNAMES
|
|
|
|
|
|
# -- filter tags -----------------------------------------------------------
|
|
|
|
def test_epic_tag_filtered():
|
|
"""Issues with [EPIC] tag should be filtered."""
|
|
title = "[EPIC] Build the thing"
|
|
for tag in FILTER_TAGS:
|
|
tag_clean = tag.upper().replace("[", "").replace("]", "")
|
|
if tag_clean in title.upper():
|
|
return # Found
|
|
assert False, "EPIC tag should be detected by FILTER_TAGS"
|
|
|
|
|
|
def test_permanent_tag_filtered():
|
|
"""Issues with [DO NOT CLOSE] tag should be filtered."""
|
|
title = "[DO NOT CLOSE] Keep this open forever"
|
|
title_upper = title.upper()
|
|
matched = any(
|
|
tag.upper().replace("[", "").replace("]", "") in title_upper
|
|
for tag in FILTER_TAGS
|
|
)
|
|
assert matched, "[DO NOT CLOSE] should be filtered"
|
|
|
|
|
|
def test_normal_title_not_filtered():
|
|
"""Normal issue titles should not be filtered."""
|
|
title = "Fix the login bug in auth.py"
|
|
title_upper = title.upper()
|
|
matched = any(
|
|
tag.upper().replace("[", "").replace("]", "") in title_upper
|
|
for tag in FILTER_TAGS
|
|
)
|
|
assert not matched, "Normal title should not be filtered"
|
|
|
|
|
|
def test_morning_report_filtered():
|
|
"""[MORNING REPORT] issues should be filtered."""
|
|
title = "[MORNING REPORT] Fleet status 2026-04-13"
|
|
title_upper = title.upper()
|
|
matched = any(
|
|
tag.upper().replace("[", "").replace("]", "") in title_upper
|
|
for tag in FILTER_TAGS
|
|
)
|
|
assert matched, "[MORNING REPORT] should be filtered"
|
|
|
|
|
|
# -- agent lane checker ----------------------------------------------------
|
|
|
|
def test_lane_check_no_config():
|
|
"""With no lane config, lane check should pass."""
|
|
ok, msg = check_agent_lane("groq", "Fix bug", [], {})
|
|
assert ok
|
|
assert "No lane config" in msg
|
|
|
|
|
|
def test_lane_check_agent_not_in_config():
|
|
"""Agent not in lane config should pass."""
|
|
lanes = {"ezra": ["docs"]}
|
|
ok, msg = check_agent_lane("groq", "Fix bug", [], lanes)
|
|
assert ok
|
|
assert "No lanes defined" in msg
|
|
|
|
|
|
def test_lane_check_agent_in_config():
|
|
"""Agent in lane config should return their lanes."""
|
|
lanes = {"groq": ["code", "infra"]}
|
|
ok, msg = check_agent_lane("groq", "Fix bug", [], lanes)
|
|
assert ok
|
|
assert "groq" in msg
|
|
assert "code" in msg
|
|
|
|
|
|
# -- agent usernames -------------------------------------------------------
|
|
|
|
def test_known_agents_in_usernames():
|
|
"""Core agent usernames should be registered."""
|
|
assert "groq" in AGENT_USERNAMES
|
|
assert "ezra" in AGENT_USERNAMES
|
|
assert "bezalel" in AGENT_USERNAMES
|
|
assert "timmy" in AGENT_USERNAMES
|
|
assert "codex-agent" in AGENT_USERNAMES
|
|
|
|
|
|
# -- pre-task gate (mocked API) -------------------------------------------
|
|
|
|
def test_pre_task_gate_issue_not_found():
|
|
"""Pre-task gate should fail if issue doesn't exist."""
|
|
from task_gate import pre_task_gate
|
|
with patch("task_gate.gitea_get", return_value=None):
|
|
passed, msgs = pre_task_gate("timmy-config", 99999, "groq")
|
|
assert not passed
|
|
assert any("not found" in m for m in msgs)
|
|
|
|
|
|
def test_pre_task_gate_filter_tag_blocks():
|
|
"""Pre-task gate should block filtered issues."""
|
|
from task_gate import pre_task_gate
|
|
mock_issue = {
|
|
"title": "[EPIC] Big thing",
|
|
"assignees": [],
|
|
"labels": [],
|
|
}
|
|
|
|
def mock_gitea_get(path):
|
|
if "issues/100" in path:
|
|
return mock_issue
|
|
if "branches" in path:
|
|
return []
|
|
if "pulls" in path:
|
|
return []
|
|
return None
|
|
|
|
with patch("task_gate.gitea_get", side_effect=mock_gitea_get):
|
|
passed, msgs = pre_task_gate("timmy-config", 100, "groq")
|
|
assert not passed
|
|
assert any("filter" in m.lower() for m in msgs)
|
|
|
|
|
|
def test_pre_task_gate_assigned_agent_blocks():
|
|
"""Pre-task gate should block issues assigned to other agents."""
|
|
from task_gate import pre_task_gate
|
|
mock_issue = {
|
|
"title": "Fix bug",
|
|
"assignees": [{"login": "ezra"}],
|
|
"labels": [],
|
|
}
|
|
|
|
def mock_gitea_get(path):
|
|
if "issues/100" in path:
|
|
return mock_issue
|
|
if "branches" in path:
|
|
return []
|
|
if "pulls" in path:
|
|
return []
|
|
return None
|
|
|
|
with patch("task_gate.gitea_get", side_effect=mock_gitea_get):
|
|
passed, msgs = pre_task_gate("timmy-config", 100, "groq")
|
|
assert not passed
|
|
assert any("Already assigned" in m for m in msgs)
|
|
|
|
|
|
def test_pre_task_gate_existing_pr_blocks():
|
|
"""Pre-task gate should block issues with existing PRs."""
|
|
from task_gate import pre_task_gate
|
|
mock_issue = {
|
|
"title": "Fix bug",
|
|
"assignees": [],
|
|
"labels": [],
|
|
}
|
|
mock_prs = [{"number": 50, "title": "Fix for #100", "body": "Closes #100"}]
|
|
|
|
def mock_gitea_get(path):
|
|
if "issues/100" in path:
|
|
return mock_issue
|
|
if "branches" in path:
|
|
return []
|
|
if "pulls" in path:
|
|
return mock_prs
|
|
return None
|
|
|
|
with patch("task_gate.gitea_get", side_effect=mock_gitea_get):
|
|
passed, msgs = pre_task_gate("timmy-config", 100, "groq")
|
|
assert not passed
|
|
assert any("Open PR" in m for m in msgs)
|
|
|
|
|
|
def test_pre_task_gate_clean_passes():
|
|
"""Pre-task gate should pass for clean issues."""
|
|
from task_gate import pre_task_gate
|
|
|
|
def mock_gitea_get(path):
|
|
if "issues/100" in path:
|
|
return {"title": "Fix bug", "assignees": [], "labels": []}
|
|
if "branches" in path:
|
|
return []
|
|
if "pulls" in path:
|
|
return []
|
|
return None
|
|
|
|
with patch("task_gate.gitea_get", side_effect=mock_gitea_get):
|
|
passed, msgs = pre_task_gate("timmy-config", 100, "groq")
|
|
assert passed
|
|
|
|
|
|
# -- post-task gate (mocked API) ------------------------------------------
|
|
|
|
def test_post_task_gate_missing_branch():
|
|
"""Post-task gate should fail if branch doesn't exist."""
|
|
from task_gate import post_task_gate
|
|
with patch("task_gate.gitea_get", return_value=None):
|
|
passed, msgs = post_task_gate("timmy-config", 100, "groq", "groq/fix-100")
|
|
assert not passed
|
|
assert any("does not exist" in m for m in msgs)
|
|
|
|
|
|
def test_post_task_gate_no_agent_prefix_warns():
|
|
"""Post-task gate should warn if branch doesn't start with agent name."""
|
|
from task_gate import post_task_gate
|
|
|
|
def mock_gitea_get(path):
|
|
if "branches/fix-100" in path:
|
|
return {"name": "fix-100"}
|
|
if "compare" in path:
|
|
return {"commits": [{"id": "abc"}], "diff_files": ["file.py"]}
|
|
if "pulls" in path:
|
|
return []
|
|
return None
|
|
|
|
with patch("task_gate.gitea_get", side_effect=mock_gitea_get):
|
|
passed, msgs = post_task_gate("timmy-config", 100, "groq", "fix-100")
|
|
assert passed # Warning, not failure
|
|
assert any("doesn't start with agent" in m or "convention" in m for m in msgs)
|
|
|
|
|
|
def test_post_task_gate_no_commits_fails():
|
|
"""Post-task gate should fail if branch has no commits ahead of main."""
|
|
from task_gate import post_task_gate
|
|
|
|
def mock_gitea_get(path):
|
|
if "branches/" in path:
|
|
return {"name": "groq/fix-100"}
|
|
if "compare" in path:
|
|
return {"commits": [], "diff_files": []}
|
|
if "pulls" in path:
|
|
return []
|
|
return None
|
|
|
|
with patch("task_gate.gitea_get", side_effect=mock_gitea_get):
|
|
passed, msgs = post_task_gate("timmy-config", 100, "groq", "groq/fix-100")
|
|
assert not passed
|
|
assert any("no commits" in m.lower() for m in msgs)
|
|
|
|
|
|
# ===========================================================================
|
|
# INTEGRATION: gate on real script files
|
|
# ===========================================================================
|
|
|
|
def test_ci_gate_on_actual_task_gate():
|
|
"""Run QualityGate on task_gate.py itself — should pass."""
|
|
gate_path = Path(__file__).resolve().parent.parent / "scripts" / "task_gate.py"
|
|
if gate_path.exists():
|
|
gate = QualityGate()
|
|
gate.check_file(gate_path)
|
|
assert gate.failures == 0, f"task_gate.py should pass quality gate, got {gate.failures} failures"
|
|
|
|
|
|
def test_ci_gate_on_actual_ci_automation_gate():
|
|
"""Run QualityGate on ci_automation_gate.py itself — should pass."""
|
|
gate_path = Path(__file__).resolve().parent.parent / "scripts" / "ci_automation_gate.py"
|
|
if gate_path.exists():
|
|
gate = QualityGate()
|
|
gate.check_file(gate_path)
|
|
assert gate.failures == 0, f"ci_automation_gate.py should pass quality gate, got {gate.failures} failures"
|
|
|
|
|
|
# ===========================================================================
|
|
# BLOOM FILTER + HASH DEDUP TESTS (Issue #628)
|
|
# ===========================================================================
|
|
|
|
import sys, os
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "pipeline"))
|
|
from quality_gate import BloomFilter, HashDedupStore, HASH_DIR, entry_hash
|
|
|
|
|
|
class TestBloomFilter:
|
|
|
|
def test_empty_bloom_no_contains(self):
|
|
bf = BloomFilter(capacity=100)
|
|
assert "hello" not in bf
|
|
|
|
def test_add_then_contains(self):
|
|
bf = BloomFilter(capacity=100)
|
|
bf.add("hello")
|
|
assert "hello" in bf
|
|
|
|
def test_false_negatives_impossible(self):
|
|
"""No false negatives — every added item is found."""
|
|
bf = BloomFilter(capacity=1000)
|
|
items = [f"item-{i}" for i in range(500)]
|
|
for item in items:
|
|
bf.add(item)
|
|
for item in items:
|
|
assert item in bf, f"False negative for {item}"
|
|
|
|
def test_false_positive_rate(self):
|
|
"""False positive rate should be under the configured error rate."""
|
|
bf = BloomFilter(capacity=1000, error_rate=0.01)
|
|
added = {f"added-{i}" for i in range(1000)}
|
|
for item in added:
|
|
bf.add(item)
|
|
false_positives = 0
|
|
check_count = 10000
|
|
for i in range(check_count):
|
|
candidate = f"not-added-{i}"
|
|
if candidate not in added and candidate in bf:
|
|
false_positives += 1
|
|
fp_rate = false_positives / check_count
|
|
assert fp_rate < 0.05, f"FP rate {fp_rate:.3%} too high (expected <5%)"
|
|
|
|
def test_serialization_roundtrip(self):
|
|
bf = BloomFilter(capacity=100)
|
|
bf.add("alpha")
|
|
bf.add("beta")
|
|
d = bf.to_dict()
|
|
restored = BloomFilter.from_dict(d)
|
|
assert "alpha" in restored
|
|
assert "beta" in restored
|
|
assert "gamma" not in restored
|
|
|
|
|
|
class TestHashDedupStore:
|
|
|
|
def test_first_seen_not_duplicate(self, tmp_path):
|
|
import quality_gate as qg
|
|
old_hash_dir = qg.HASH_DIR
|
|
qg.HASH_DIR = tmp_path / "hashes"
|
|
try:
|
|
store = HashDedupStore()
|
|
assert not store.is_duplicate("abc123")
|
|
finally:
|
|
qg.HASH_DIR = old_hash_dir
|
|
|
|
def test_after_add_is_duplicate(self, tmp_path):
|
|
import quality_gate as qg
|
|
old_hash_dir = qg.HASH_DIR
|
|
qg.HASH_DIR = tmp_path / "hashes"
|
|
try:
|
|
store = HashDedupStore()
|
|
store.add("abc123")
|
|
store.flush()
|
|
assert store.is_duplicate("abc123")
|
|
finally:
|
|
qg.HASH_DIR = old_hash_dir
|
|
|
|
def test_different_hash_not_duplicate(self, tmp_path):
|
|
import quality_gate as qg
|
|
old_hash_dir = qg.HASH_DIR
|
|
qg.HASH_DIR = tmp_path / "hashes"
|
|
try:
|
|
store = HashDedupStore()
|
|
store.add("abc123")
|
|
store.flush()
|
|
assert not store.is_duplicate("xyz789")
|
|
finally:
|
|
qg.HASH_DIR = old_hash_dir
|
|
|
|
def test_rotation_deletes_old_files(self, tmp_path):
|
|
"""Files older than retention_days should be deleted."""
|
|
import quality_gate as qg
|
|
old_hash_dir = qg.HASH_DIR
|
|
qg.HASH_DIR = tmp_path / "hashes"
|
|
qg.HASH_DIR.mkdir(parents=True, exist_ok=True)
|
|
try:
|
|
# Create old file
|
|
old_date = "2020-01-01"
|
|
(qg.HASH_DIR / f"{old_date}.json").write_text('["old_hash"]')
|
|
# Create today's file
|
|
from datetime import datetime, timezone
|
|
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
(qg.HASH_DIR / f"{today}.json").write_text('["new_hash"]')
|
|
|
|
store = HashDedupStore(retention_days=7)
|
|
store._rotate()
|
|
|
|
assert not (qg.HASH_DIR / f"{old_date}.json").exists(), "Old file should be deleted"
|
|
assert (qg.HASH_DIR / f"{today}.json").exists(), "Today's file should remain"
|
|
finally:
|
|
qg.HASH_DIR = old_hash_dir
|
|
|
|
def test_stats_reports_counts(self, tmp_path):
|
|
import quality_gate as qg
|
|
old_hash_dir = qg.HASH_DIR
|
|
qg.HASH_DIR = tmp_path / "hashes"
|
|
try:
|
|
store = HashDedupStore()
|
|
for i in range(5):
|
|
store.add(f"hash-{i}")
|
|
store.flush()
|
|
stats = store.stats()
|
|
assert stats["today_count"] == 5
|
|
assert stats["total_hashes"] >= 5
|
|
finally:
|
|
qg.HASH_DIR = old_hash_dir
|
|
|
|
def test_large_scale_dedup(self, tmp_path):
|
|
"""10K hashes should work without blowing up memory."""
|
|
import quality_gate as qg
|
|
old_hash_dir = qg.HASH_DIR
|
|
qg.HASH_DIR = tmp_path / "hashes"
|
|
try:
|
|
store = HashDedupStore()
|
|
hashes = [f"hash-{i:06d}" for i in range(10000)]
|
|
for h in hashes:
|
|
store.add(h)
|
|
store.flush()
|
|
# All should be duplicates now
|
|
dupes = sum(1 for h in hashes if store.is_duplicate(h))
|
|
assert dupes == 10000, f"Expected 10000 dupes, got {dupes}"
|
|
finally:
|
|
qg.HASH_DIR = old_hash_dir
|
|
|
|
|
|
class TestCrossRunDedup:
|
|
|
|
def test_run_gate_rejects_cross_run_duplicate(self, tmp_path):
|
|
"""Second run with same content should reject duplicates."""
|
|
import quality_gate as qg
|
|
old_hash_dir = qg.HASH_DIR
|
|
old_stats = qg.STATS_FILE
|
|
qg.HASH_DIR = tmp_path / "hashes"
|
|
qg.STATS_FILE = tmp_path / "stats.json"
|
|
try:
|
|
# Write test JSONL
|
|
entries = [{"prompt": "hello", "response": "world " * 20}]
|
|
jsonl_path = tmp_path / "test.jsonl"
|
|
jsonl_path.write_text(json.dumps(entries[0]) + "\n")
|
|
|
|
# First run — passes
|
|
store1 = HashDedupStore()
|
|
report1 = qg.run_gate(str(jsonl_path), "training_pairs", store1)
|
|
assert report1.passed == 1
|
|
assert report1.rejected == 0
|
|
|
|
# Second run with new store (simulates restart) — should detect dupe
|
|
store2 = HashDedupStore()
|
|
report2 = qg.run_gate(str(jsonl_path), "training_pairs", store2)
|
|
# The hash was persisted to disk, so store2 should detect it
|
|
assert report2.rejected == 1, f"Expected 1 rejected, got {report2.rejected}"
|
|
finally:
|
|
qg.HASH_DIR = old_hash_dir
|
|
qg.STATS_FILE = old_stats
|
|
|
|
def test_entry_hash_deterministic(self):
|
|
"""Same entry always produces same hash."""
|
|
entry = {"prompt": "test", "response": "data"}
|
|
h1 = entry_hash(entry)
|
|
h2 = entry_hash(entry)
|
|
assert h1 == h2
|
|
assert len(h1) == 16
|
|
|
|
def test_entry_hash_differs_for_different_entries(self):
|
|
h1 = entry_hash({"a": 1})
|
|
h2 = entry_hash({"a": 2})
|
|
assert h1 != h2
|