uniwizard/test_self_grader.py

#!/usr/bin/env python3
"""
Tests for the Self-Grader Module

Run with: python -m pytest test_self_grader.py -v
"""

import json
import sqlite3
import tempfile
from pathlib import Path
from datetime import datetime, timedelta
import pytest

from self_grader import SelfGrader, SessionGrade, WeeklyReport


class TestSessionGrade:
    """Tests for SessionGrade dataclass."""
    
    def test_session_grade_creation(self):
        """Test creating a SessionGrade."""
        grade = SessionGrade(
            session_id="test-123",
            session_file="session_test.json",
            graded_at=datetime.now().isoformat(),
            task_completed=True,
            tool_calls_efficient=4,
            response_quality=5,
            errors_recovered=True,
            total_api_calls=10,
            model="claude-opus",
            platform="cli",
            session_start=datetime.now().isoformat(),
            duration_seconds=120.0,
            task_summary="Test task",
            total_errors=0,
            error_types="[]",
            tools_with_errors="[]",
            had_repeated_errors=False,
            had_infinite_loop_risk=False,
            had_user_clarification=False
        )
        
        assert grade.session_id == "test-123"
        assert grade.task_completed is True
        assert grade.tool_calls_efficient == 4
        assert grade.response_quality == 5


class TestSelfGraderInit:
    """Tests for SelfGrader initialization."""
    
    def test_init_creates_database(self, tmp_path):
        """Test that initialization creates the database."""
        db_path = tmp_path / "grades.db"
        sessions_dir = tmp_path / "sessions"
        sessions_dir.mkdir()
        
        grader = SelfGrader(grades_db_path=db_path, sessions_dir=sessions_dir)
        
        assert db_path.exists()
        
        # Check tables exist
        with sqlite3.connect(db_path) as conn:
            cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
            tables = {row[0] for row in cursor.fetchall()}
            
            assert "session_grades" in tables
            assert "weekly_reports" in tables


class TestErrorDetection:
    """Tests for error detection and classification."""
    
    def test_detect_exit_code_error(self, tmp_path):
        """Test detection of exit code errors."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        assert grader._detect_error('{"exit_code": 1, "output": ""}') is True
        assert grader._detect_error('{"exit_code": 0, "output": "success"}') is False
        assert grader._detect_error('') is False
    
    def test_detect_explicit_error(self, tmp_path):
        """Test detection of explicit error messages."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        assert grader._detect_error('{"error": "file not found"}') is True
        assert grader._detect_error('Traceback (most recent call last):') is True
        assert grader._detect_error('Command failed with exception') is True
    
    def test_classify_file_not_found(self, tmp_path):
        """Test classification of file not found errors."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        error = "Error: file '/path/to/file' not found"
        assert grader._classify_error(error) == "file_not_found"
    
    def test_classify_timeout(self, tmp_path):
        """Test classification of timeout errors."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        error = "Request timed out after 30 seconds"
        assert grader._classify_error(error) == "timeout"
    
    def test_classify_unknown(self, tmp_path):
        """Test classification of unknown errors."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        error = "Something weird happened"
        assert grader._classify_error(error) == "unknown"


class TestSessionAnalysis:
    """Tests for session analysis."""
    
    def test_analyze_empty_messages(self, tmp_path):
        """Test analysis of empty message list."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        analysis = grader._analyze_messages([])
        
        assert analysis['total_api_calls'] == 0
        assert analysis['total_errors'] == 0
        assert analysis['had_repeated_errors'] is False
    
    def test_analyze_simple_session(self, tmp_path):
        """Test analysis of a simple successful session."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        messages = [
            {"role": "user", "content": "Hello"},
            {"role": "assistant", "content": "Hi there!"},
        ]
        
        analysis = grader._analyze_messages(messages)
        
        assert analysis['total_api_calls'] == 1
        assert analysis['total_errors'] == 0
    
    def test_analyze_session_with_errors(self, tmp_path):
        """Test analysis of a session with errors."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        messages = [
            {"role": "user", "content": "Run command"},
            {"role": "assistant", "content": "", "tool_calls": [
                {"function": {"name": "terminal"}}
            ]},
            {"role": "tool", "name": "terminal", "content": '{"exit_code": 1, "error": "failed"}'},
            {"role": "assistant", "content": "Let me try again", "tool_calls": [
                {"function": {"name": "terminal"}}
            ]},
            {"role": "tool", "name": "terminal", "content": '{"exit_code": 0, "output": "success"}'},
        ]
        
        analysis = grader._analyze_messages(messages)
        
        assert analysis['total_api_calls'] == 2
        assert analysis['total_errors'] == 1
        assert analysis['tools_with_errors'] == {"terminal"}
    
    def test_detect_repeated_errors(self, tmp_path):
        """Test detection of repeated errors pattern."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        messages = []
        for i in range(5):
            messages.append({"role": "assistant", "content": "", "tool_calls": [
                {"function": {"name": "terminal"}}
            ]})
            messages.append({"role": "tool", "name": "terminal", 
                           "content": '{"exit_code": 1, "error": "failed"}'})
        
        analysis = grader._analyze_messages(messages)
        
        assert analysis['had_repeated_errors'] is True
        assert analysis['had_infinite_loop_risk'] is True


class TestGradingLogic:
    """Tests for grading logic."""
    
    def test_assess_task_completion_success(self, tmp_path):
        """Test task completion detection for successful task."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        messages = [
            {"role": "user", "content": "Create a file"},
            {"role": "assistant", "content": "Done! Created the file successfully."},
        ]
        
        analysis = grader._analyze_messages(messages)
        result = grader._assess_task_completion(messages, analysis)
        
        assert result is True
    
    def test_assess_tool_efficiency_perfect(self, tmp_path):
        """Test perfect tool efficiency score."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        analysis = {
            'total_api_calls': 5,
            'total_errors': 0
        }
        
        score = grader._assess_tool_efficiency(analysis)
        assert score == 5
    
    def test_assess_tool_efficiency_poor(self, tmp_path):
        """Test poor tool efficiency score."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        analysis = {
            'total_api_calls': 10,
            'total_errors': 5
        }
        
        score = grader._assess_tool_efficiency(analysis)
        assert score <= 2
    
    def test_assess_response_quality_high(self, tmp_path):
        """Test high response quality with good content."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        messages = [
            {"role": "assistant", "content": "Here's the solution:\n```python\nprint('hello')\n```\n" + "x" * 1000}
        ]
        
        analysis = {
            'final_assistant_msg': messages[0],
            'total_errors': 0,
            'had_repeated_errors': False,
            'had_infinite_loop_risk': False
        }
        
        score = grader._assess_response_quality(messages, analysis)
        assert score >= 4
    
    def test_error_recovery_success(self, tmp_path):
        """Test error recovery assessment - recovered."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        analysis = {
            'total_errors': 1,
            'had_repeated_errors': False
        }
        
        messages = [
            {"role": "assistant", "content": "Success after retry!"}
        ]
        
        result = grader._assess_error_recovery(messages, analysis)
        assert result is True


class TestSessionGrading:
    """Tests for full session grading."""
    
    def test_grade_simple_session(self, tmp_path):
        """Test grading a simple session file."""
        sessions_dir = tmp_path / "sessions"
        sessions_dir.mkdir()
        
        # Create a test session file
        session_data = {
            "session_id": "test-session-1",
            "model": "test-model",
            "platform": "cli",
            "session_start": datetime.now().isoformat(),
            "message_count": 2,
            "messages": [
                {"role": "user", "content": "Hello, create a test file"},
                {"role": "assistant", "content": "Done! Created test.txt successfully."}
            ]
        }
        
        session_file = sessions_dir / "session_test-session-1.json"
        with open(session_file, 'w') as f:
            json.dump(session_data, f)
        
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=sessions_dir
        )
        
        grade = grader.grade_session_file(session_file)
        
        assert grade is not None
        assert grade.session_id == "test-session-1"
        assert grade.task_completed is True
        assert grade.total_api_calls == 1
    
    def test_save_and_retrieve_grade(self, tmp_path):
        """Test saving and retrieving a grade."""
        sessions_dir = tmp_path / "sessions"
        sessions_dir.mkdir()
        
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=sessions_dir
        )
        
        grade = SessionGrade(
            session_id="test-save",
            session_file="test.json",
            graded_at=datetime.now().isoformat(),
            task_completed=True,
            tool_calls_efficient=4,
            response_quality=5,
            errors_recovered=True,
            total_api_calls=10,
            model="test-model",
            platform="cli",
            session_start=datetime.now().isoformat(),
            duration_seconds=60.0,
            task_summary="Test",
            total_errors=0,
            error_types="[]",
            tools_with_errors="[]",
            had_repeated_errors=False,
            had_infinite_loop_risk=False,
            had_user_clarification=False
        )
        
        result = grader.save_grade(grade)
        assert result is True
        
        # Verify in database
        with sqlite3.connect(tmp_path / "grades.db") as conn:
            cursor = conn.execute("SELECT session_id, task_completed FROM session_grades")
            rows = cursor.fetchall()
            
            assert len(rows) == 1
            assert rows[0][0] == "test-save"
            assert rows[0][1] == 1


class TestPatternIdentification:
    """Tests for pattern identification."""
    
    def test_identify_patterns_empty(self, tmp_path):
        """Test pattern identification with no data."""
        sessions_dir = tmp_path / "sessions"
        sessions_dir.mkdir()
        
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=sessions_dir
        )
        
        patterns = grader.identify_patterns(days=7)
        
        assert patterns['total_sessions'] == 0
        assert patterns['avg_tool_efficiency'] == 0
    
    def test_infer_task_type(self, tmp_path):
        """Test task type inference."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        assert grader._infer_task_type("Please review this code") == "code_review"
        assert grader._infer_task_type("Fix the bug in login") == "debugging"
        assert grader._infer_task_type("Add a new feature") == "feature_impl"
        assert grader._infer_task_type("Do something random") == "general"


class TestWeeklyReport:
    """Tests for weekly report generation."""
    
    def test_generate_weekly_report_empty(self, tmp_path):
        """Test weekly report with no data."""
        sessions_dir = tmp_path / "sessions"
        sessions_dir.mkdir()
        
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=sessions_dir
        )
        
        report = grader.generate_weekly_report()
        
        assert report.total_sessions == 0
        assert report.avg_tool_efficiency == 0
        assert len(report.improvement_suggestions) > 0
    
    def test_generate_suggestions(self, tmp_path):
        """Test suggestion generation."""
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=tmp_path / "sessions"
        )
        
        patterns = {
            'completion_rate': 50,
            'avg_tool_efficiency': 2,
            'error_recovery_rate': 70
        }
        
        suggestions = grader._generate_suggestions(
            patterns,
            [('code_review', 2.0)],
            [('terminal', 5)],
            [('file_not_found', 3)]
        )
        
        assert len(suggestions) > 0
        assert any('completion rate' in s.lower() for s in suggestions)


class TestGradeLatestSessions:
    """Tests for grading latest sessions."""
    
    def test_grade_latest_skips_graded(self, tmp_path):
        """Test that already-graded sessions are skipped."""
        sessions_dir = tmp_path / "sessions"
        sessions_dir.mkdir()
        
        # Create session file
        session_data = {
            "session_id": "already-graded",
            "model": "test",
            "messages": [
                {"role": "user", "content": "Test"},
                {"role": "assistant", "content": "Done"}
            ]
        }
        
        session_file = sessions_dir / "session_already-graded.json"
        with open(session_file, 'w') as f:
            json.dump(session_data, f)
        
        grader = SelfGrader(
            grades_db_path=tmp_path / "grades.db",
            sessions_dir=sessions_dir
        )
        
        # First grading
        grades1 = grader.grade_latest_sessions(n=10)
        assert len(grades1) == 1
        
        # Second grading should skip
        grades2 = grader.grade_latest_sessions(n=10)
        assert len(grades2) == 0


def test_main_cli():
    """Test CLI main function exists."""
    from self_grader import main
    assert callable(main)


if __name__ == '__main__':
    pytest.main([__file__, '-v'])
[KimiClaw] Uniwizard routing modules — quality scorer, task classifier, self-grader (#107) Co-authored-by: Kimi Claw <kimi@timmytime.ai> Co-committed-by: Kimi Claw <kimi@timmytime.ai> 2026-03-30 20:15:36 +00:00			`#!/usr/bin/env python3`
			`"""`
			`Tests for the Self-Grader Module`

			`Run with: python -m pytest test_self_grader.py -v`
			`"""`

			`import json`
			`import sqlite3`
			`import tempfile`
			`from pathlib import Path`
			`from datetime import datetime, timedelta`
			`import pytest`

			`from self_grader import SelfGrader, SessionGrade, WeeklyReport`


			`class TestSessionGrade:`
			`"""Tests for SessionGrade dataclass."""`

			`def test_session_grade_creation(self):`
			`"""Test creating a SessionGrade."""`
			`grade = SessionGrade(`
			`session_id="test-123",`
			`session_file="session_test.json",`
			`graded_at=datetime.now().isoformat(),`
			`task_completed=True,`
			`tool_calls_efficient=4,`
			`response_quality=5,`
			`errors_recovered=True,`
			`total_api_calls=10,`
			`model="claude-opus",`
			`platform="cli",`
			`session_start=datetime.now().isoformat(),`
			`duration_seconds=120.0,`
			`task_summary="Test task",`
			`total_errors=0,`
			`error_types="[]",`
			`tools_with_errors="[]",`
			`had_repeated_errors=False,`
			`had_infinite_loop_risk=False,`
			`had_user_clarification=False`
			`)`

			`assert grade.session_id == "test-123"`
			`assert grade.task_completed is True`
			`assert grade.tool_calls_efficient == 4`
			`assert grade.response_quality == 5`


			`class TestSelfGraderInit:`
			`"""Tests for SelfGrader initialization."""`

			`def test_init_creates_database(self, tmp_path):`
			`"""Test that initialization creates the database."""`
			`db_path = tmp_path / "grades.db"`
			`sessions_dir = tmp_path / "sessions"`
			`sessions_dir.mkdir()`

			`grader = SelfGrader(grades_db_path=db_path, sessions_dir=sessions_dir)`

			`assert db_path.exists()`

			`# Check tables exist`
			`with sqlite3.connect(db_path) as conn:`
			`cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")`
			`tables = {row[0] for row in cursor.fetchall()}`

			`assert "session_grades" in tables`
			`assert "weekly_reports" in tables`


			`class TestErrorDetection:`
			`"""Tests for error detection and classification."""`

			`def test_detect_exit_code_error(self, tmp_path):`
			`"""Test detection of exit code errors."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`assert grader._detect_error('{"exit_code": 1, "output": ""}') is True`
			`assert grader._detect_error('{"exit_code": 0, "output": "success"}') is False`
			`assert grader._detect_error('') is False`

			`def test_detect_explicit_error(self, tmp_path):`
			`"""Test detection of explicit error messages."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`assert grader._detect_error('{"error": "file not found"}') is True`
			`assert grader._detect_error('Traceback (most recent call last):') is True`
			`assert grader._detect_error('Command failed with exception') is True`

			`def test_classify_file_not_found(self, tmp_path):`
			`"""Test classification of file not found errors."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`error = "Error: file '/path/to/file' not found"`
			`assert grader._classify_error(error) == "file_not_found"`

			`def test_classify_timeout(self, tmp_path):`
			`"""Test classification of timeout errors."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`error = "Request timed out after 30 seconds"`
			`assert grader._classify_error(error) == "timeout"`

			`def test_classify_unknown(self, tmp_path):`
			`"""Test classification of unknown errors."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`error = "Something weird happened"`
			`assert grader._classify_error(error) == "unknown"`


			`class TestSessionAnalysis:`
			`"""Tests for session analysis."""`

			`def test_analyze_empty_messages(self, tmp_path):`
			`"""Test analysis of empty message list."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`analysis = grader._analyze_messages([])`

			`assert analysis['total_api_calls'] == 0`
			`assert analysis['total_errors'] == 0`
			`assert analysis['had_repeated_errors'] is False`

			`def test_analyze_simple_session(self, tmp_path):`
			`"""Test analysis of a simple successful session."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`messages = [`
			`{"role": "user", "content": "Hello"},`
			`{"role": "assistant", "content": "Hi there!"},`
			`]`

			`analysis = grader._analyze_messages(messages)`

			`assert analysis['total_api_calls'] == 1`
			`assert analysis['total_errors'] == 0`

			`def test_analyze_session_with_errors(self, tmp_path):`
			`"""Test analysis of a session with errors."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`messages = [`
			`{"role": "user", "content": "Run command"},`
			`{"role": "assistant", "content": "", "tool_calls": [`
			`{"function": {"name": "terminal"}}`
			`]},`
			`{"role": "tool", "name": "terminal", "content": '{"exit_code": 1, "error": "failed"}'},`
			`{"role": "assistant", "content": "Let me try again", "tool_calls": [`
			`{"function": {"name": "terminal"}}`
			`]},`
			`{"role": "tool", "name": "terminal", "content": '{"exit_code": 0, "output": "success"}'},`
			`]`

			`analysis = grader._analyze_messages(messages)`

			`assert analysis['total_api_calls'] == 2`
			`assert analysis['total_errors'] == 1`
			`assert analysis['tools_with_errors'] == {"terminal"}`

			`def test_detect_repeated_errors(self, tmp_path):`
			`"""Test detection of repeated errors pattern."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`messages = []`
			`for i in range(5):`
			`messages.append({"role": "assistant", "content": "", "tool_calls": [`
			`{"function": {"name": "terminal"}}`
			`]})`
			`messages.append({"role": "tool", "name": "terminal",`
			`"content": '{"exit_code": 1, "error": "failed"}'})`

			`analysis = grader._analyze_messages(messages)`

			`assert analysis['had_repeated_errors'] is True`
			`assert analysis['had_infinite_loop_risk'] is True`


			`class TestGradingLogic:`
			`"""Tests for grading logic."""`

			`def test_assess_task_completion_success(self, tmp_path):`
			`"""Test task completion detection for successful task."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`messages = [`
			`{"role": "user", "content": "Create a file"},`
			`{"role": "assistant", "content": "Done! Created the file successfully."},`
			`]`

			`analysis = grader._analyze_messages(messages)`
			`result = grader._assess_task_completion(messages, analysis)`

			`assert result is True`

			`def test_assess_tool_efficiency_perfect(self, tmp_path):`
			`"""Test perfect tool efficiency score."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`analysis = {`
			`'total_api_calls': 5,`
			`'total_errors': 0`
			`}`

			`score = grader._assess_tool_efficiency(analysis)`
			`assert score == 5`

			`def test_assess_tool_efficiency_poor(self, tmp_path):`
			`"""Test poor tool efficiency score."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`analysis = {`
			`'total_api_calls': 10,`
			`'total_errors': 5`
			`}`

			`score = grader._assess_tool_efficiency(analysis)`
			`assert score <= 2`

			`def test_assess_response_quality_high(self, tmp_path):`
			`"""Test high response quality with good content."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`messages = [`
			{"role": "assistant", "content": "Here's the solution:\n```python\nprint('hello')\n```\n" + "x" * 1000}
			`]`

			`analysis = {`
			`'final_assistant_msg': messages[0],`
			`'total_errors': 0,`
			`'had_repeated_errors': False,`
			`'had_infinite_loop_risk': False`
			`}`

			`score = grader._assess_response_quality(messages, analysis)`
			`assert score >= 4`

			`def test_error_recovery_success(self, tmp_path):`
			`"""Test error recovery assessment - recovered."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`analysis = {`
			`'total_errors': 1,`
			`'had_repeated_errors': False`
			`}`

			`messages = [`
			`{"role": "assistant", "content": "Success after retry!"}`
			`]`

			`result = grader._assess_error_recovery(messages, analysis)`
			`assert result is True`


			`class TestSessionGrading:`
			`"""Tests for full session grading."""`

			`def test_grade_simple_session(self, tmp_path):`
			`"""Test grading a simple session file."""`
			`sessions_dir = tmp_path / "sessions"`
			`sessions_dir.mkdir()`

			`# Create a test session file`
			`session_data = {`
			`"session_id": "test-session-1",`
			`"model": "test-model",`
			`"platform": "cli",`
			`"session_start": datetime.now().isoformat(),`
			`"message_count": 2,`
			`"messages": [`
			`{"role": "user", "content": "Hello, create a test file"},`
			`{"role": "assistant", "content": "Done! Created test.txt successfully."}`
			`]`
			`}`

			`session_file = sessions_dir / "session_test-session-1.json"`
			`with open(session_file, 'w') as f:`
			`json.dump(session_data, f)`

			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=sessions_dir`
			`)`

			`grade = grader.grade_session_file(session_file)`

			`assert grade is not None`
			`assert grade.session_id == "test-session-1"`
			`assert grade.task_completed is True`
			`assert grade.total_api_calls == 1`

			`def test_save_and_retrieve_grade(self, tmp_path):`
			`"""Test saving and retrieving a grade."""`
			`sessions_dir = tmp_path / "sessions"`
			`sessions_dir.mkdir()`

			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=sessions_dir`
			`)`

			`grade = SessionGrade(`
			`session_id="test-save",`
			`session_file="test.json",`
			`graded_at=datetime.now().isoformat(),`
			`task_completed=True,`
			`tool_calls_efficient=4,`
			`response_quality=5,`
			`errors_recovered=True,`
			`total_api_calls=10,`
			`model="test-model",`
			`platform="cli",`
			`session_start=datetime.now().isoformat(),`
			`duration_seconds=60.0,`
			`task_summary="Test",`
			`total_errors=0,`
			`error_types="[]",`
			`tools_with_errors="[]",`
			`had_repeated_errors=False,`
			`had_infinite_loop_risk=False,`
			`had_user_clarification=False`
			`)`

			`result = grader.save_grade(grade)`
			`assert result is True`

			`# Verify in database`
			`with sqlite3.connect(tmp_path / "grades.db") as conn:`
			`cursor = conn.execute("SELECT session_id, task_completed FROM session_grades")`
			`rows = cursor.fetchall()`

			`assert len(rows) == 1`
			`assert rows[0][0] == "test-save"`
			`assert rows[0][1] == 1`


			`class TestPatternIdentification:`
			`"""Tests for pattern identification."""`

			`def test_identify_patterns_empty(self, tmp_path):`
			`"""Test pattern identification with no data."""`
			`sessions_dir = tmp_path / "sessions"`
			`sessions_dir.mkdir()`

			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=sessions_dir`
			`)`

			`patterns = grader.identify_patterns(days=7)`

			`assert patterns['total_sessions'] == 0`
			`assert patterns['avg_tool_efficiency'] == 0`

			`def test_infer_task_type(self, tmp_path):`
			`"""Test task type inference."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`assert grader._infer_task_type("Please review this code") == "code_review"`
			`assert grader._infer_task_type("Fix the bug in login") == "debugging"`
			`assert grader._infer_task_type("Add a new feature") == "feature_impl"`
			`assert grader._infer_task_type("Do something random") == "general"`


			`class TestWeeklyReport:`
			`"""Tests for weekly report generation."""`

			`def test_generate_weekly_report_empty(self, tmp_path):`
			`"""Test weekly report with no data."""`
			`sessions_dir = tmp_path / "sessions"`
			`sessions_dir.mkdir()`

			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=sessions_dir`
			`)`

			`report = grader.generate_weekly_report()`

			`assert report.total_sessions == 0`
			`assert report.avg_tool_efficiency == 0`
			`assert len(report.improvement_suggestions) > 0`

			`def test_generate_suggestions(self, tmp_path):`
			`"""Test suggestion generation."""`
			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=tmp_path / "sessions"`
			`)`

			`patterns = {`
			`'completion_rate': 50,`
			`'avg_tool_efficiency': 2,`
			`'error_recovery_rate': 70`
			`}`

			`suggestions = grader._generate_suggestions(`
			`patterns,`
			`[('code_review', 2.0)],`
			`[('terminal', 5)],`
			`[('file_not_found', 3)]`
			`)`

			`assert len(suggestions) > 0`
			`assert any('completion rate' in s.lower() for s in suggestions)`


			`class TestGradeLatestSessions:`
			`"""Tests for grading latest sessions."""`

			`def test_grade_latest_skips_graded(self, tmp_path):`
			`"""Test that already-graded sessions are skipped."""`
			`sessions_dir = tmp_path / "sessions"`
			`sessions_dir.mkdir()`

			`# Create session file`
			`session_data = {`
			`"session_id": "already-graded",`
			`"model": "test",`
			`"messages": [`
			`{"role": "user", "content": "Test"},`
			`{"role": "assistant", "content": "Done"}`
			`]`
			`}`

			`session_file = sessions_dir / "session_already-graded.json"`
			`with open(session_file, 'w') as f:`
			`json.dump(session_data, f)`

			`grader = SelfGrader(`
			`grades_db_path=tmp_path / "grades.db",`
			`sessions_dir=sessions_dir`
			`)`

			`# First grading`
			`grades1 = grader.grade_latest_sessions(n=10)`
			`assert len(grades1) == 1`

			`# Second grading should skip`
			`grades2 = grader.grade_latest_sessions(n=10)`
			`assert len(grades2) == 0`


			`def test_main_cli():`
			`"""Test CLI main function exists."""`
			`from self_grader import main`
			`assert callable(main)`


			`if __name__ == '__main__':`
			`pytest.main([__file__, '-v'])`