diff --git a/tests/test_checkpoint_resumption.py b/tests/test_checkpoint_resumption.py index 095397212..d294db7f7 100644 --- a/tests/test_checkpoint_resumption.py +++ b/tests/test_checkpoint_resumption.py @@ -105,6 +105,16 @@ def monitor_checkpoint_during_run(checkpoint_file: Path, duration: int = 30) -> return snapshots +def _cleanup_test_artifacts(*paths): + """Remove test-generated files and directories.""" + for p in paths: + p = Path(p) + if p.is_dir(): + shutil.rmtree(p, ignore_errors=True) + elif p.is_file(): + p.unlink(missing_ok=True) + + def test_current_implementation(): """Test the current checkpoint implementation.""" print("\n" + "=" * 70) @@ -167,6 +177,8 @@ def test_current_implementation(): print(f"❌ Error during run: {e}") traceback.print_exc() return False + finally: + _cleanup_test_artifacts(dataset_file, output_dir) elapsed = time.time() - start_time @@ -220,9 +232,9 @@ def test_interruption_and_resume(): print(f"\n▶️ Starting first run (will process 5 prompts, then simulate interruption)...") + temp_dataset = Path("tests/test_data/checkpoint_test_resume_partial.jsonl") try: # Create a modified dataset with only first 5 prompts for initial run - temp_dataset = Path("tests/test_data/checkpoint_test_resume_partial.jsonl") with open(dataset_file, 'r') as f: lines = f.readlines()[:5] with open(temp_dataset, 'w') as f: @@ -292,6 +304,8 @@ def test_interruption_and_resume(): print(f"❌ Error during test: {e}") traceback.print_exc() return False + finally: + _cleanup_test_artifacts(dataset_file, temp_dataset, output_dir) def test_simulated_crash():