diff --git a/scripts/test_harvest_prompt_comprehensive.py b/scripts/test_harvest_prompt_comprehensive.py deleted file mode 100644 index 11d0f7b..0000000 --- a/scripts/test_harvest_prompt_comprehensive.py +++ /dev/null @@ -1,236 +0,0 @@ -#!/usr/bin/env python3 -""" -Comprehensive test script for knowledge extraction prompt. -Validates prompt structure, requirements, and consistency. -""" - -import json -import re -from pathlib import Path - -def check_prompt_structure(): - """Test that the prompt has the required structure.""" - prompt_path = Path("templates/harvest-prompt.md") - if not prompt_path.exists(): - return False, "harvest-prompt.md not found" - - content = prompt_path.read_text() - - # Check for required sections - required_sections = [ - "System Prompt", - "Instructions", - "Categories", - "Output Format", - "Confidence Scoring", - "Constraints", - "Example" - ] - - for section in required_sections: - if section.lower() not in content.lower(): - return False, f"Missing required section: {section}" - - # Check for required categories - required_categories = ["fact", "pitfall", "pattern", "tool-quirk", "question"] - for category in required_categories: - if category not in content: - return False, f"Missing required category: {category}" - - # Check for required output fields - required_fields = ["fact", "category", "repo", "confidence"] - for field in required_fields: - if field not in content: - return False, f"Missing required output field: {field}" - - # Check prompt size (should be ~1k tokens, roughly 4k chars) - if len(content) > 5000: - return False, f"Prompt too large: {len(content)} chars (max ~5000)" - - if len(content) < 1000: - return False, f"Prompt too small: {len(content)} chars (min ~1000)" - - return True, "Prompt structure is valid" - -def check_confidence_scoring(): - """Test that confidence scoring is properly defined.""" - prompt_path = Path("templates/harvest-prompt.md") - content = prompt_path.read_text() - - # Check for confidence scale definitions - confidence_levels = [ - ("0.9-1.0", "explicitly stated"), - ("0.7-0.8", "clearly implied"), - ("0.5-0.6", "suggested"), - ("0.3-0.4", "inferred"), - ("0.1-0.2", "speculative") - ] - - for level, description in confidence_levels: - if level not in content: - return False, f"Missing confidence level: {level}" - if description.lower() not in content.lower(): - return False, f"Missing confidence description: {description}" - - return True, "Confidence scoring is properly defined" - -def check_example_quality(): - """Test that examples are clear and complete.""" - prompt_path = Path("templates/harvest-prompt.md") - content = prompt_path.read_text() - - # Check for example input/output - if "example" not in content.lower(): - return False, "No examples provided" - - # Check that example includes all categories - example_section = content[content.lower().find("example"):] - - # Look for JSON example - json_match = re.search(r'\{[\s\S]*"knowledge"[\s\S]*\}', example_section) - if not json_match: - return False, "No JSON example found" - - example_json = json_match.group(0) - - # Check for all categories in example - for category in ["fact", "pitfall", "pattern", "tool-quirk", "question"]: - if category not in example_json: - return False, f"Example missing category: {category}" - - return True, "Examples are clear and complete" - -def check_constraint_coverage(): - """Test that constraints cover all requirements.""" - prompt_path = Path("templates/harvest-prompt.md") - content = prompt_path.read_text() - - required_constraints = [ - "No hallucination", - "only extract", - "explicitly", - "partial", - "failed sessions", - "1k tokens" - ] - - for constraint in required_constraints: - if constraint.lower() not in content.lower(): - return False, f"Missing constraint: {constraint}" - - return True, "Constraints cover all requirements" - -def check_test_sessions(): - """Test that test sessions exist and are valid.""" - test_sessions_dir = Path("test_sessions") - if not test_sessions_dir.exists(): - return False, "test_sessions directory not found" - - session_files = list(test_sessions_dir.glob("*.jsonl")) - if len(session_files) < 5: - return False, f"Only {len(session_files)} test sessions found, need 5" - - # Check each session file - for session_file in session_files: - content = session_file.read_text() - lines = content.strip().split("\n") - - # Check that each line is valid JSON - for i, line in enumerate(lines, 1): - try: - json.loads(line) - except json.JSONDecodeError as e: - return False, f"Invalid JSON in {session_file.name}, line {i}: {e}" - - return True, f"Found {len(session_files)} valid test sessions" - -def run_all_tests(): - """Run all tests and return results.""" - tests = [ - ("Prompt Structure", check_prompt_structure), - ("Confidence Scoring", check_confidence_scoring), - ("Example Quality", check_example_quality), - ("Constraint Coverage", check_constraint_coverage), - ("Test Sessions", check_test_sessions) - ] - - results = [] - all_passed = True - - for test_name, test_func in tests: - try: - passed, message = test_func() - results.append({ - "test": test_name, - "passed": passed, - "message": message - }) - if not passed: - all_passed = False - except Exception as e: - results.append({ - "test": test_name, - "passed": False, - "message": f"Error: {str(e)}" - }) - all_passed = False - - # Print results - print("=" * 60) - print("HARVEST PROMPT TEST RESULTS") - print("=" * 60) - - for result in results: - status = "✓ PASS" if result["passed"] else "✗ FAIL" - print(f"{status}: {result['test']}") - print(f" {result['message']}") - print() - - print("=" * 60) - if all_passed: - print("ALL TESTS PASSED!") - else: - print("SOME TESTS FAILED!") - print("=" * 60) - - return all_passed, results - - - -# --- Pytest-compatible tests --- - -def test_prompt_structure(): - passed, msg = check_prompt_structure() - assert passed, msg - -def test_confidence_scoring(): - passed, msg = check_confidence_scoring() - assert passed, msg - -def test_example_quality(): - passed, msg = check_example_quality() - assert passed, msg - -def test_constraint_coverage(): - passed, msg = check_constraint_coverage() - assert passed, msg - -def test_test_sessions(): - passed, msg = check_test_sessions() - assert passed, msg - -if __name__ == "__main__": - all_passed, results = run_all_tests() - - # Save results to file - with open("test_results.json", "w") as f: - json.dump({ - "all_passed": all_passed, - "results": results, - "timestamp": "2026-04-14T19:05:00Z" - }, f, indent=2) - - print(f"Results saved to test_results.json") - - # Exit with appropriate code - exit(0 if all_passed else 1)