#!/usr/bin/env python3 """Tests for normalize-code-blocks.py — issue #750""" import json import sys import tempfile from pathlib import Path import pytest # Import from scripts/ sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) from normalize_code_blocks import normalize_code_block, process_line, process_file, CODE_BLOCK_RE class TestCodeBlockRegex: def test_matches_python_block(self): text = "```python\nprint('hi')\n```" assert CODE_BLOCK_RE.search(text) def test_matches_plain_block(self): text = "```\nsome code\n```" assert CODE_BLOCK_RE.search(text) def test_matches_bash_block(self): text = "```bash\necho hello\n```" assert CODE_BLOCK_RE.search(text) def test_ignores_inline_backticks(self): text = "Use `code` inline" assert not CODE_BLOCK_RE.search(text) def test_handles_multiline_code(self): text = "```python\ndef foo():\n return 1\n\ndef bar():\n return 2\n```" match = CODE_BLOCK_RE.search(text) assert match assert "def foo" in match.group("code") class TestNormalizeCodeBlock: def test_strips_leading_indent(self): match = CODE_BLOCK_RE.search("```python\n print('hi')\n```") result = normalize_code_block(match) assert " print" not in result assert "print('hi')" in result def test_dedents_mixed_indent(self): code = "```python\n def foo():\n return 1\n def bar():\n return 2\n```" match = CODE_BLOCK_RE.search(code) result = normalize_code_block(match) lines = result.split("\n") # First non-tag line should have 0 indent code_lines = [l for l in lines if l.strip() and not l.startswith("```")] assert code_lines[0].startswith("def foo") def test_strips_trailing_blank_lines(self): match = CODE_BLOCK_RE.search("```python\nprint('hi')\n\n\n```") result = normalize_code_block(match) assert result.endswith("print('hi')\n```") def test_preserves_language_tag(self): match = CODE_BLOCK_RE.search("```python\n x = 1\n```") result = normalize_code_block(match) assert result.startswith("```python") def test_empty_block_unchanged(self): match = CODE_BLOCK_RE.search("```python\n \n```") original = match.group(0) result = normalize_code_block(match) assert result == original def test_diff_markers_preserved(self): code = "```\n+def new_func():\n+ return 1\n-def old_func():\n- return 0\n```" match = CODE_BLOCK_RE.search(code) result = normalize_code_block(match) assert "+def new_func" in result assert "-def old_func" in result class TestProcessLine: def test_valid_json_no_code_blocks(self): line = json.dumps({"prompt": "hello world"}) new_line, fixes = process_line(line) assert fixes == 0 def test_valid_json_with_code_block(self): obj = {"prompt": "Here is code:\n```python\n x = 1\n```"} line = json.dumps(obj) new_line, fixes = process_line(line) assert fixes == 1 parsed = json.loads(new_line) assert " x = 1" not in parsed["prompt"] def test_nested_dict_code_blocks(self): obj = { "prompt": "code: ```python\n a = 1\n```", "chosen": "```python\n b = 2\n```", } line = json.dumps(obj) new_line, fixes = process_line(line) assert fixes == 2 def test_invalid_json_returned_unchanged(self): line = "{broken json" new_line, fixes = process_line(line) assert new_line == line assert fixes == 0 def test_list_field_code_blocks(self): obj = {"items": ["```python\n x = 1\n```", "no code here"]} line = json.dumps(obj) new_line, fixes = process_line(line) assert fixes == 1 class TestProcessFile: def test_fixes_file_in_place(self, tmp_path): f = tmp_path / "test.jsonl" lines = [ json.dumps({"prompt": "```python\n x = 1\n```"}), json.dumps({"prompt": "no code"}), ] f.write_text("\n".join(lines) + "\n") result = process_file(str(f)) assert result["fixes"] == 1 assert result["lines"] == 2 # Verify file was actually modified content = f.read_text() assert " x = 1" not in content def test_dry_run_no_write(self, tmp_path): f = tmp_path / "test.jsonl" original = json.dumps({"prompt": "```python\n x = 1\n```"}) f.write_text(original + "\n") result = process_file(str(f), dry_run=True) assert result["fixes"] == 1 # File unchanged assert f.read_text().strip() == original def test_missing_file(self, tmp_path): result = process_file(str(tmp_path / "nope.jsonl")) assert "error" in result def test_clean_file_no_fixes(self, tmp_path): f = tmp_path / "clean.jsonl" f.write_text(json.dumps({"prompt": "no code blocks here"}) + "\n") result = process_file(str(f)) assert result["fixes"] == 0