From 8d716ff03f1c0201343232c62e5f9beaa98631df Mon Sep 17 00:00:00 2001 From: Timmy Time Date: Tue, 14 Apr 2026 19:02:41 +0000 Subject: [PATCH] Add comprehensive test script for harvest prompt validation --- scripts/test_harvest_prompt_comprehensive.py | 212 +++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 scripts/test_harvest_prompt_comprehensive.py diff --git a/scripts/test_harvest_prompt_comprehensive.py b/scripts/test_harvest_prompt_comprehensive.py new file mode 100644 index 0000000..8f1ee0b --- /dev/null +++ b/scripts/test_harvest_prompt_comprehensive.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +""" +Comprehensive test script for knowledge extraction prompt. +Validates prompt structure, requirements, and consistency. +""" + +import json +import re +from pathlib import Path + +def test_prompt_structure(): + """Test that the prompt has the required structure.""" + prompt_path = Path("templates/harvest-prompt.md") + if not prompt_path.exists(): + return False, "harvest-prompt.md not found" + + content = prompt_path.read_text() + + # Check for required sections + required_sections = [ + "System Prompt", + "Instructions", + "Categories", + "Output Format", + "Confidence Scoring", + "Constraints", + "Example" + ] + + for section in required_sections: + if section.lower() not in content.lower(): + return False, f"Missing required section: {section}" + + # Check for required categories + required_categories = ["fact", "pitfall", "pattern", "tool-quirk", "question"] + for category in required_categories: + if category not in content: + return False, f"Missing required category: {category}" + + # Check for required output fields + required_fields = ["fact", "category", "repo", "confidence"] + for field in required_fields: + if field not in content: + return False, f"Missing required output field: {field}" + + # Check prompt size (should be ~1k tokens, roughly 4k chars) + if len(content) > 5000: + return False, f"Prompt too large: {len(content)} chars (max ~5000)" + + if len(content) < 1000: + return False, f"Prompt too small: {len(content)} chars (min ~1000)" + + return True, "Prompt structure is valid" + +def test_confidence_scoring(): + """Test that confidence scoring is properly defined.""" + prompt_path = Path("templates/harvest-prompt.md") + content = prompt_path.read_text() + + # Check for confidence scale definitions + confidence_levels = [ + ("0.9-1.0", "explicitly stated"), + ("0.7-0.8", "clearly implied"), + ("0.5-0.6", "suggested"), + ("0.3-0.4", "inferred"), + ("0.1-0.2", "speculative") + ] + + for level, description in confidence_levels: + if level not in content: + return False, f"Missing confidence level: {level}" + if description.lower() not in content.lower(): + return False, f"Missing confidence description: {description}" + + return True, "Confidence scoring is properly defined" + +def test_example_quality(): + """Test that examples are clear and complete.""" + prompt_path = Path("templates/harvest-prompt.md") + content = prompt_path.read_text() + + # Check for example input/output + if "example" not in content.lower(): + return False, "No examples provided" + + # Check that example includes all categories + example_section = content[content.lower().find("example"):] + + # Look for JSON example + json_match = re.search(r'\{[\s\S]*"knowledge"[\s\S]*\}', example_section) + if not json_match: + return False, "No JSON example found" + + example_json = json_match.group(0) + + # Check for all categories in example + for category in ["fact", "pitfall", "pattern", "tool-quirk", "question"]: + if category not in example_json: + return False, f"Example missing category: {category}" + + return True, "Examples are clear and complete" + +def test_constraint_coverage(): + """Test that constraints cover all requirements.""" + prompt_path = Path("templates/harvest-prompt.md") + content = prompt_path.read_text() + + required_constraints = [ + "No hallucination", + "only extract", + "explicitly", + "partial", + "failed sessions", + "1k tokens" + ] + + for constraint in required_constraints: + if constraint.lower() not in content.lower(): + return False, f"Missing constraint: {constraint}" + + return True, "Constraints cover all requirements" + +def test_test_sessions(): + """Test that test sessions exist and are valid.""" + test_sessions_dir = Path("test_sessions") + if not test_sessions_dir.exists(): + return False, "test_sessions directory not found" + + session_files = list(test_sessions_dir.glob("*.jsonl")) + if len(session_files) < 5: + return False, f"Only {len(session_files)} test sessions found, need 5" + + # Check each session file + for session_file in session_files: + content = session_file.read_text() + lines = content.strip().split("\n") + + # Check that each line is valid JSON + for i, line in enumerate(lines, 1): + try: + json.loads(line) + except json.JSONDecodeError as e: + return False, f"Invalid JSON in {session_file.name}, line {i}: {e}" + + return True, f"Found {len(session_files)} valid test sessions" + +def run_all_tests(): + """Run all tests and return results.""" + tests = [ + ("Prompt Structure", test_prompt_structure), + ("Confidence Scoring", test_confidence_scoring), + ("Example Quality", test_example_quality), + ("Constraint Coverage", test_constraint_coverage), + ("Test Sessions", test_test_sessions) + ] + + results = [] + all_passed = True + + for test_name, test_func in tests: + try: + passed, message = test_func() + results.append({ + "test": test_name, + "passed": passed, + "message": message + }) + if not passed: + all_passed = False + except Exception as e: + results.append({ + "test": test_name, + "passed": False, + "message": f"Error: {str(e)}" + }) + all_passed = False + + # Print results + print("=" * 60) + print("HARVEST PROMPT TEST RESULTS") + print("=" * 60) + + for result in results: + status = "✓ PASS" if result["passed"] else "✗ FAIL" + print(f"{status}: {result['test']}") + print(f" {result['message']}") + print() + + print("=" * 60) + if all_passed: + print("ALL TESTS PASSED!") + else: + print("SOME TESTS FAILED!") + print("=" * 60) + + return all_passed, results + +if __name__ == "__main__": + all_passed, results = run_all_tests() + + # Save results to file + with open("test_results.json", "w") as f: + json.dump({ + "all_passed": all_passed, + "results": results, + "timestamp": "2026-04-14T19:05:00Z" + }, f, indent=2) + + print(f"Results saved to test_results.json") + + # Exit with appropriate code + exit(0 if all_passed else 1)