#!/usr/bin/env python3 """ Tests for the Self-Grader Module Run with: python -m pytest test_self_grader.py -v """ import json import sqlite3 import tempfile from pathlib import Path from datetime import datetime, timedelta import pytest from self_grader import SelfGrader, SessionGrade, WeeklyReport class TestSessionGrade: """Tests for SessionGrade dataclass.""" def test_session_grade_creation(self): """Test creating a SessionGrade.""" grade = SessionGrade( session_id="test-123", session_file="session_test.json", graded_at=datetime.now().isoformat(), task_completed=True, tool_calls_efficient=4, response_quality=5, errors_recovered=True, total_api_calls=10, model="claude-opus", platform="cli", session_start=datetime.now().isoformat(), duration_seconds=120.0, task_summary="Test task", total_errors=0, error_types="[]", tools_with_errors="[]", had_repeated_errors=False, had_infinite_loop_risk=False, had_user_clarification=False ) assert grade.session_id == "test-123" assert grade.task_completed is True assert grade.tool_calls_efficient == 4 assert grade.response_quality == 5 class TestSelfGraderInit: """Tests for SelfGrader initialization.""" def test_init_creates_database(self, tmp_path): """Test that initialization creates the database.""" db_path = tmp_path / "grades.db" sessions_dir = tmp_path / "sessions" sessions_dir.mkdir() grader = SelfGrader(grades_db_path=db_path, sessions_dir=sessions_dir) assert db_path.exists() # Check tables exist with sqlite3.connect(db_path) as conn: cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'") tables = {row[0] for row in cursor.fetchall()} assert "session_grades" in tables assert "weekly_reports" in tables class TestErrorDetection: """Tests for error detection and classification.""" def test_detect_exit_code_error(self, tmp_path): """Test detection of exit code errors.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) assert grader._detect_error('{"exit_code": 1, "output": ""}') is True assert grader._detect_error('{"exit_code": 0, "output": "success"}') is False assert grader._detect_error('') is False def test_detect_explicit_error(self, tmp_path): """Test detection of explicit error messages.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) assert grader._detect_error('{"error": "file not found"}') is True assert grader._detect_error('Traceback (most recent call last):') is True assert grader._detect_error('Command failed with exception') is True def test_classify_file_not_found(self, tmp_path): """Test classification of file not found errors.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) error = "Error: file '/path/to/file' not found" assert grader._classify_error(error) == "file_not_found" def test_classify_timeout(self, tmp_path): """Test classification of timeout errors.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) error = "Request timed out after 30 seconds" assert grader._classify_error(error) == "timeout" def test_classify_unknown(self, tmp_path): """Test classification of unknown errors.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) error = "Something weird happened" assert grader._classify_error(error) == "unknown" class TestSessionAnalysis: """Tests for session analysis.""" def test_analyze_empty_messages(self, tmp_path): """Test analysis of empty message list.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) analysis = grader._analyze_messages([]) assert analysis['total_api_calls'] == 0 assert analysis['total_errors'] == 0 assert analysis['had_repeated_errors'] is False def test_analyze_simple_session(self, tmp_path): """Test analysis of a simple successful session.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) messages = [ {"role": "user", "content": "Hello"}, {"role": "assistant", "content": "Hi there!"}, ] analysis = grader._analyze_messages(messages) assert analysis['total_api_calls'] == 1 assert analysis['total_errors'] == 0 def test_analyze_session_with_errors(self, tmp_path): """Test analysis of a session with errors.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) messages = [ {"role": "user", "content": "Run command"}, {"role": "assistant", "content": "", "tool_calls": [ {"function": {"name": "terminal"}} ]}, {"role": "tool", "name": "terminal", "content": '{"exit_code": 1, "error": "failed"}'}, {"role": "assistant", "content": "Let me try again", "tool_calls": [ {"function": {"name": "terminal"}} ]}, {"role": "tool", "name": "terminal", "content": '{"exit_code": 0, "output": "success"}'}, ] analysis = grader._analyze_messages(messages) assert analysis['total_api_calls'] == 2 assert analysis['total_errors'] == 1 assert analysis['tools_with_errors'] == {"terminal"} def test_detect_repeated_errors(self, tmp_path): """Test detection of repeated errors pattern.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) messages = [] for i in range(5): messages.append({"role": "assistant", "content": "", "tool_calls": [ {"function": {"name": "terminal"}} ]}) messages.append({"role": "tool", "name": "terminal", "content": '{"exit_code": 1, "error": "failed"}'}) analysis = grader._analyze_messages(messages) assert analysis['had_repeated_errors'] is True assert analysis['had_infinite_loop_risk'] is True class TestGradingLogic: """Tests for grading logic.""" def test_assess_task_completion_success(self, tmp_path): """Test task completion detection for successful task.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) messages = [ {"role": "user", "content": "Create a file"}, {"role": "assistant", "content": "Done! Created the file successfully."}, ] analysis = grader._analyze_messages(messages) result = grader._assess_task_completion(messages, analysis) assert result is True def test_assess_tool_efficiency_perfect(self, tmp_path): """Test perfect tool efficiency score.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) analysis = { 'total_api_calls': 5, 'total_errors': 0 } score = grader._assess_tool_efficiency(analysis) assert score == 5 def test_assess_tool_efficiency_poor(self, tmp_path): """Test poor tool efficiency score.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) analysis = { 'total_api_calls': 10, 'total_errors': 5 } score = grader._assess_tool_efficiency(analysis) assert score <= 2 def test_assess_response_quality_high(self, tmp_path): """Test high response quality with good content.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) messages = [ {"role": "assistant", "content": "Here's the solution:\n```python\nprint('hello')\n```\n" + "x" * 1000} ] analysis = { 'final_assistant_msg': messages[0], 'total_errors': 0, 'had_repeated_errors': False, 'had_infinite_loop_risk': False } score = grader._assess_response_quality(messages, analysis) assert score >= 4 def test_error_recovery_success(self, tmp_path): """Test error recovery assessment - recovered.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) analysis = { 'total_errors': 1, 'had_repeated_errors': False } messages = [ {"role": "assistant", "content": "Success after retry!"} ] result = grader._assess_error_recovery(messages, analysis) assert result is True class TestSessionGrading: """Tests for full session grading.""" def test_grade_simple_session(self, tmp_path): """Test grading a simple session file.""" sessions_dir = tmp_path / "sessions" sessions_dir.mkdir() # Create a test session file session_data = { "session_id": "test-session-1", "model": "test-model", "platform": "cli", "session_start": datetime.now().isoformat(), "message_count": 2, "messages": [ {"role": "user", "content": "Hello, create a test file"}, {"role": "assistant", "content": "Done! Created test.txt successfully."} ] } session_file = sessions_dir / "session_test-session-1.json" with open(session_file, 'w') as f: json.dump(session_data, f) grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=sessions_dir ) grade = grader.grade_session_file(session_file) assert grade is not None assert grade.session_id == "test-session-1" assert grade.task_completed is True assert grade.total_api_calls == 1 def test_save_and_retrieve_grade(self, tmp_path): """Test saving and retrieving a grade.""" sessions_dir = tmp_path / "sessions" sessions_dir.mkdir() grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=sessions_dir ) grade = SessionGrade( session_id="test-save", session_file="test.json", graded_at=datetime.now().isoformat(), task_completed=True, tool_calls_efficient=4, response_quality=5, errors_recovered=True, total_api_calls=10, model="test-model", platform="cli", session_start=datetime.now().isoformat(), duration_seconds=60.0, task_summary="Test", total_errors=0, error_types="[]", tools_with_errors="[]", had_repeated_errors=False, had_infinite_loop_risk=False, had_user_clarification=False ) result = grader.save_grade(grade) assert result is True # Verify in database with sqlite3.connect(tmp_path / "grades.db") as conn: cursor = conn.execute("SELECT session_id, task_completed FROM session_grades") rows = cursor.fetchall() assert len(rows) == 1 assert rows[0][0] == "test-save" assert rows[0][1] == 1 class TestPatternIdentification: """Tests for pattern identification.""" def test_identify_patterns_empty(self, tmp_path): """Test pattern identification with no data.""" sessions_dir = tmp_path / "sessions" sessions_dir.mkdir() grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=sessions_dir ) patterns = grader.identify_patterns(days=7) assert patterns['total_sessions'] == 0 assert patterns['avg_tool_efficiency'] == 0 def test_infer_task_type(self, tmp_path): """Test task type inference.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) assert grader._infer_task_type("Please review this code") == "code_review" assert grader._infer_task_type("Fix the bug in login") == "debugging" assert grader._infer_task_type("Add a new feature") == "feature_impl" assert grader._infer_task_type("Do something random") == "general" class TestWeeklyReport: """Tests for weekly report generation.""" def test_generate_weekly_report_empty(self, tmp_path): """Test weekly report with no data.""" sessions_dir = tmp_path / "sessions" sessions_dir.mkdir() grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=sessions_dir ) report = grader.generate_weekly_report() assert report.total_sessions == 0 assert report.avg_tool_efficiency == 0 assert len(report.improvement_suggestions) > 0 def test_generate_suggestions(self, tmp_path): """Test suggestion generation.""" grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=tmp_path / "sessions" ) patterns = { 'completion_rate': 50, 'avg_tool_efficiency': 2, 'error_recovery_rate': 70 } suggestions = grader._generate_suggestions( patterns, [('code_review', 2.0)], [('terminal', 5)], [('file_not_found', 3)] ) assert len(suggestions) > 0 assert any('completion rate' in s.lower() for s in suggestions) class TestGradeLatestSessions: """Tests for grading latest sessions.""" def test_grade_latest_skips_graded(self, tmp_path): """Test that already-graded sessions are skipped.""" sessions_dir = tmp_path / "sessions" sessions_dir.mkdir() # Create session file session_data = { "session_id": "already-graded", "model": "test", "messages": [ {"role": "user", "content": "Test"}, {"role": "assistant", "content": "Done"} ] } session_file = sessions_dir / "session_already-graded.json" with open(session_file, 'w') as f: json.dump(session_data, f) grader = SelfGrader( grades_db_path=tmp_path / "grades.db", sessions_dir=sessions_dir ) # First grading grades1 = grader.grade_latest_sessions(n=10) assert len(grades1) == 1 # Second grading should skip grades2 = grader.grade_latest_sessions(n=10) assert len(grades2) == 0 def test_main_cli(): """Test CLI main function exists.""" from self_grader import main assert callable(main) if __name__ == '__main__': pytest.main([__file__, '-v'])