#!/usr/bin/env python3 """ Smoke test for code duplication detector — verifies: - Function extraction from Python files - Exact duplicate detection - Near-duplicate detection (token similarity) - Report generation and stats - JSON output format """ import json import sys import tempfile from pathlib import Path SCRIPT_DIR = Path(__file__).parent.absolute() sys.path.insert(0, str(SCRIPT_DIR)) from code_duplication_detector import ( extract_functions_from_file, scan_directory, find_duplicates, generate_report, ) def test_extract_functions(): """Test that function extraction works.""" with tempfile.TemporaryDirectory() as tmpdir: test_file = Path(tmpdir) / 'sample.py' test_file.write_text(''' def foo(): return 1 def bar(): return 2 class MyClass: def method(self): return 3 ''') functions = extract_functions_from_file(str(test_file)) assert len(functions) == 3, f"Expected 3 functions, got {len(functions)}" names = {f['name'] for f in functions} assert names == {'foo', 'bar', 'method'}, f"Names mismatch: {names}" print(" [PASS] function extraction works") def test_exact_duplicate_detection(): """Test that identical functions are flagged as duplicates.""" with tempfile.TemporaryDirectory() as tmpdir: # Create two files with the same function f1 = Path(tmpdir) / 'a.py' f1.write_text(''' def duplicated(): x = 1 y = 2 return x + y ''') f2 = Path(tmpdir) / 'b.py' f2.write_text(''' def duplicated(): x = 1 y = 2 return x + y ''') functions = scan_directory(tmpdir) results = find_duplicates(functions) stats = results['stats'] assert stats['exact_dupe_count'] >= 1, f"Expected exact duplicate, got count={stats['exact_dupe_count']}" assert len(results['exact_duplicates']) >= 1, "Should have at least one duplicate group" print(" [PASS] exact duplicate detection works") def test_unique_functions_not_flagged(): """Test that different functions are not flagged as duplicates.""" with tempfile.TemporaryDirectory() as tmpdir: f1 = Path(tmpdir) / 'a.py' f1.write_text('def func_a(): return 1') f2 = Path(tmpdir) / 'b.py' f2.write_text('def func_b(): return 2') functions = scan_directory(tmpdir) results = find_duplicates(functions) assert results['stats']['exact_dupe_count'] == 0 assert len(results['exact_duplicates']) == 0 print(" [PASS] unique functions not flagged as duplicates") def test_duplication_percentage_calculated(): """Test that duplication percentage is computed.""" with tempfile.TemporaryDirectory() as tmpdir: # Create file with mostly duplicated content f1 = Path(tmpdir) / 'a.py' f1.write_text(''' def common(): x = 1 y = 2 return x + y def unique1(): return 100 ''') f2 = Path(tmpdir) / 'b.py' f2.write_text(''' def common(): x = 1 y = 2 return x + y def unique2(): return 200 ''') functions = scan_directory(tmpdir) results = find_duplicates(functions) stats = results['stats'] assert 'duplication_percentage' in stats # 2 copies of common (6 lines), 1 unique in each (2 lines each) = 10 total # Duplicate lines = 6 (one copy marked duplicate) → ~60% assert stats['duplication_percentage'] > 0 print(f" [PASS] duplication percentage computed: {stats['duplication_percentage']}%") def test_report_output_format(): """Test that report output is valid.""" with tempfile.TemporaryDirectory() as tmpdir: f1 = Path(tmpdir) / 'a.py' f1.write_text('def dup(): return 1') f2 = Path(tmpdir) / 'b.py' f2.write_text('def dup(): return 1') functions = scan_directory(tmpdir) results = find_duplicates(functions) # Text report text = generate_report(results, output_format='text') assert 'CODE DUPLICATION REPORT' in text assert 'Total functions' in text print(" [PASS] text report format valid") # JSON report json_out = generate_report(results, output_format='json') data = json.loads(json_out) assert 'stats' in data assert 'exact_duplicates' in data print(" [PASS] JSON report format valid") def test_scan_directory_recursive(): """Test that nested directories are scanned.""" with tempfile.TemporaryDirectory() as tmpdir: subdir = Path(tmpdir) / 'sub' subdir.mkdir() (subdir / 'nested.py').write_text('def nested(): pass') (Path(tmpdir) / 'root.py').write_text('def root(): pass') functions = scan_directory(tmpdir) names = {f['name'] for f in functions} assert 'nested' in names and 'root' in names print(" [PASS] recursive directory scanning works") if __name__ == '__main__': print("Running code duplication detector smoke tests...") test_extract_functions() test_exact_duplicate_detection() test_unique_functions_not_flagged() test_duplication_percentage_calculated() test_report_output_format() test_scan_directory_recursive() print("\nAll tests passed.")