""" Tests for scripts/normalize-code-blocks.py — Code block indentation normalization. """ import json import unittest from pathlib import Path import sys sys.path.insert(0, str(Path(__file__).parent.parent / "scripts")) from normalize_code_blocks import process_line class TestProcessLine(unittest.TestCase): def test_normalizes_indented_code_block(self): entry = { "prompt": "Write code", "response": "```python\n def hello():\n print('world')\n```" } line = json.dumps(entry) result, count = process_line(line) parsed = json.loads(result.strip()) # Code block indentation should be normalized self.assertIn("def hello():", parsed["response"]) def test_preserves_non_code_content(self): entry = {"prompt": "Hello", "response": "How are you?"} line = json.dumps(entry) result, count = process_line(line) parsed = json.loads(result.strip()) self.assertEqual(parsed["response"], "How are you?") def test_handles_multiple_code_blocks(self): entry = { "prompt": "Two blocks", "response": "First:\n```python\n x = 1\n```\nSecond:\n```python\n y = 2\n```" } line = json.dumps(entry) result, count = process_line(line) parsed = json.loads(result.strip()) self.assertIn("x = 1", parsed["response"]) self.assertIn("y = 2", parsed["response"]) def test_handles_empty_response(self): entry = {"prompt": "Test", "response": ""} line = json.dumps(entry) result, count = process_line(line) parsed = json.loads(result.strip()) self.assertEqual(parsed["response"], "") def test_preserves_prompt(self): entry = {"prompt": "Write a function", "response": "```python\n def f(): pass\n```"} line = json.dumps(entry) result, count = process_line(line) parsed = json.loads(result.strip()) self.assertEqual(parsed["prompt"], "Write a function") if __name__ == "__main__": unittest.main()