From 29fa9d50aad53dfe0eb7c2607125126d7024098e Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Thu, 16 Apr 2026 01:47:32 +0000 Subject: [PATCH] test: Add normalize code blocks tests (#750) --- tests/test_normalize_code_blocks.py | 151 ++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 tests/test_normalize_code_blocks.py diff --git a/tests/test_normalize_code_blocks.py b/tests/test_normalize_code_blocks.py new file mode 100644 index 00000000..b0a2fda7 --- /dev/null +++ b/tests/test_normalize_code_blocks.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +"""Tests for normalize-code-blocks.py — issue #750""" +import json +import sys +import tempfile +from pathlib import Path + +import pytest + +# Import from scripts/ +sys.path.insert(0, str(Path(__file__).resolve().parent.parent / "scripts")) +from normalize_code_blocks import normalize_code_block, process_line, process_file, CODE_BLOCK_RE + + +class TestCodeBlockRegex: + def test_matches_python_block(self): + text = "```python\nprint('hi')\n```" + assert CODE_BLOCK_RE.search(text) + + def test_matches_plain_block(self): + text = "```\nsome code\n```" + assert CODE_BLOCK_RE.search(text) + + def test_matches_bash_block(self): + text = "```bash\necho hello\n```" + assert CODE_BLOCK_RE.search(text) + + def test_ignores_inline_backticks(self): + text = "Use `code` inline" + assert not CODE_BLOCK_RE.search(text) + + def test_handles_multiline_code(self): + text = "```python\ndef foo():\n return 1\n\ndef bar():\n return 2\n```" + match = CODE_BLOCK_RE.search(text) + assert match + assert "def foo" in match.group("code") + + +class TestNormalizeCodeBlock: + def test_strips_leading_indent(self): + match = CODE_BLOCK_RE.search("```python\n print('hi')\n```") + result = normalize_code_block(match) + assert " print" not in result + assert "print('hi')" in result + + def test_dedents_mixed_indent(self): + code = "```python\n def foo():\n return 1\n def bar():\n return 2\n```" + match = CODE_BLOCK_RE.search(code) + result = normalize_code_block(match) + lines = result.split("\n") + # First non-tag line should have 0 indent + code_lines = [l for l in lines if l.strip() and not l.startswith("```")] + assert code_lines[0].startswith("def foo") + + def test_strips_trailing_blank_lines(self): + match = CODE_BLOCK_RE.search("```python\nprint('hi')\n\n\n```") + result = normalize_code_block(match) + assert result.endswith("print('hi')\n```") + + def test_preserves_language_tag(self): + match = CODE_BLOCK_RE.search("```python\n x = 1\n```") + result = normalize_code_block(match) + assert result.startswith("```python") + + def test_empty_block_unchanged(self): + match = CODE_BLOCK_RE.search("```python\n \n```") + original = match.group(0) + result = normalize_code_block(match) + assert result == original + + def test_diff_markers_preserved(self): + code = "```\n+def new_func():\n+ return 1\n-def old_func():\n- return 0\n```" + match = CODE_BLOCK_RE.search(code) + result = normalize_code_block(match) + assert "+def new_func" in result + assert "-def old_func" in result + + +class TestProcessLine: + def test_valid_json_no_code_blocks(self): + line = json.dumps({"prompt": "hello world"}) + new_line, fixes = process_line(line) + assert fixes == 0 + + def test_valid_json_with_code_block(self): + obj = {"prompt": "Here is code:\n```python\n x = 1\n```"} + line = json.dumps(obj) + new_line, fixes = process_line(line) + assert fixes == 1 + parsed = json.loads(new_line) + assert " x = 1" not in parsed["prompt"] + + def test_nested_dict_code_blocks(self): + obj = { + "prompt": "code: ```python\n a = 1\n```", + "chosen": "```python\n b = 2\n```", + } + line = json.dumps(obj) + new_line, fixes = process_line(line) + assert fixes == 2 + + def test_invalid_json_returned_unchanged(self): + line = "{broken json" + new_line, fixes = process_line(line) + assert new_line == line + assert fixes == 0 + + def test_list_field_code_blocks(self): + obj = {"items": ["```python\n x = 1\n```", "no code here"]} + line = json.dumps(obj) + new_line, fixes = process_line(line) + assert fixes == 1 + + +class TestProcessFile: + def test_fixes_file_in_place(self, tmp_path): + f = tmp_path / "test.jsonl" + lines = [ + json.dumps({"prompt": "```python\n x = 1\n```"}), + json.dumps({"prompt": "no code"}), + ] + f.write_text("\n".join(lines) + "\n") + + result = process_file(str(f)) + assert result["fixes"] == 1 + assert result["lines"] == 2 + + # Verify file was actually modified + content = f.read_text() + assert " x = 1" not in content + + def test_dry_run_no_write(self, tmp_path): + f = tmp_path / "test.jsonl" + original = json.dumps({"prompt": "```python\n x = 1\n```"}) + f.write_text(original + "\n") + + result = process_file(str(f), dry_run=True) + assert result["fixes"] == 1 + + # File unchanged + assert f.read_text().strip() == original + + def test_missing_file(self, tmp_path): + result = process_file(str(tmp_path / "nope.jsonl")) + assert "error" in result + + def test_clean_file_no_fixes(self, tmp_path): + f = tmp_path / "clean.jsonl" + f.write_text(json.dumps({"prompt": "no code blocks here"}) + "\n") + result = process_file(str(f)) + assert result["fixes"] == 0