169 lines
5.2 KiB
Python
169 lines
5.2 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Smoke test for code duplication detector — verifies:
|
||
|
|
- Function extraction from Python files
|
||
|
|
- Exact duplicate detection
|
||
|
|
- Near-duplicate detection (token similarity)
|
||
|
|
- Report generation and stats
|
||
|
|
- JSON output format
|
||
|
|
"""
|
||
|
|
|
||
|
|
import json
|
||
|
|
import sys
|
||
|
|
import tempfile
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
SCRIPT_DIR = Path(__file__).parent.absolute()
|
||
|
|
sys.path.insert(0, str(SCRIPT_DIR))
|
||
|
|
|
||
|
|
from code_duplication_detector import (
|
||
|
|
extract_functions_from_file,
|
||
|
|
scan_directory,
|
||
|
|
find_duplicates,
|
||
|
|
generate_report,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def test_extract_functions():
|
||
|
|
"""Test that function extraction works."""
|
||
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||
|
|
test_file = Path(tmpdir) / 'sample.py'
|
||
|
|
test_file.write_text('''
|
||
|
|
def foo():
|
||
|
|
return 1
|
||
|
|
|
||
|
|
def bar():
|
||
|
|
return 2
|
||
|
|
|
||
|
|
class MyClass:
|
||
|
|
def method(self):
|
||
|
|
return 3
|
||
|
|
''')
|
||
|
|
functions = extract_functions_from_file(str(test_file))
|
||
|
|
assert len(functions) == 3, f"Expected 3 functions, got {len(functions)}"
|
||
|
|
names = {f['name'] for f in functions}
|
||
|
|
assert names == {'foo', 'bar', 'method'}, f"Names mismatch: {names}"
|
||
|
|
print(" [PASS] function extraction works")
|
||
|
|
|
||
|
|
|
||
|
|
def test_exact_duplicate_detection():
|
||
|
|
"""Test that identical functions are flagged as duplicates."""
|
||
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||
|
|
# Create two files with the same function
|
||
|
|
f1 = Path(tmpdir) / 'a.py'
|
||
|
|
f1.write_text('''
|
||
|
|
def duplicated():
|
||
|
|
x = 1
|
||
|
|
y = 2
|
||
|
|
return x + y
|
||
|
|
''')
|
||
|
|
f2 = Path(tmpdir) / 'b.py'
|
||
|
|
f2.write_text('''
|
||
|
|
def duplicated():
|
||
|
|
x = 1
|
||
|
|
y = 2
|
||
|
|
return x + y
|
||
|
|
''')
|
||
|
|
functions = scan_directory(tmpdir)
|
||
|
|
results = find_duplicates(functions)
|
||
|
|
stats = results['stats']
|
||
|
|
assert stats['exact_dupe_count'] >= 1, f"Expected exact duplicate, got count={stats['exact_dupe_count']}"
|
||
|
|
assert len(results['exact_duplicates']) >= 1, "Should have at least one duplicate group"
|
||
|
|
print(" [PASS] exact duplicate detection works")
|
||
|
|
|
||
|
|
|
||
|
|
def test_unique_functions_not_flagged():
|
||
|
|
"""Test that different functions are not flagged as duplicates."""
|
||
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||
|
|
f1 = Path(tmpdir) / 'a.py'
|
||
|
|
f1.write_text('def func_a(): return 1')
|
||
|
|
f2 = Path(tmpdir) / 'b.py'
|
||
|
|
f2.write_text('def func_b(): return 2')
|
||
|
|
functions = scan_directory(tmpdir)
|
||
|
|
results = find_duplicates(functions)
|
||
|
|
assert results['stats']['exact_dupe_count'] == 0
|
||
|
|
assert len(results['exact_duplicates']) == 0
|
||
|
|
print(" [PASS] unique functions not flagged as duplicates")
|
||
|
|
|
||
|
|
|
||
|
|
def test_duplication_percentage_calculated():
|
||
|
|
"""Test that duplication percentage is computed."""
|
||
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||
|
|
# Create file with mostly duplicated content
|
||
|
|
f1 = Path(tmpdir) / 'a.py'
|
||
|
|
f1.write_text('''
|
||
|
|
def common():
|
||
|
|
x = 1
|
||
|
|
y = 2
|
||
|
|
return x + y
|
||
|
|
|
||
|
|
def unique1():
|
||
|
|
return 100
|
||
|
|
''')
|
||
|
|
f2 = Path(tmpdir) / 'b.py'
|
||
|
|
f2.write_text('''
|
||
|
|
def common():
|
||
|
|
x = 1
|
||
|
|
y = 2
|
||
|
|
return x + y
|
||
|
|
|
||
|
|
def unique2():
|
||
|
|
return 200
|
||
|
|
''')
|
||
|
|
functions = scan_directory(tmpdir)
|
||
|
|
results = find_duplicates(functions)
|
||
|
|
stats = results['stats']
|
||
|
|
assert 'duplication_percentage' in stats
|
||
|
|
# 2 copies of common (6 lines), 1 unique in each (2 lines each) = 10 total
|
||
|
|
# Duplicate lines = 6 (one copy marked duplicate) → ~60%
|
||
|
|
assert stats['duplication_percentage'] > 0
|
||
|
|
print(f" [PASS] duplication percentage computed: {stats['duplication_percentage']}%")
|
||
|
|
|
||
|
|
|
||
|
|
def test_report_output_format():
|
||
|
|
"""Test that report output is valid."""
|
||
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||
|
|
f1 = Path(tmpdir) / 'a.py'
|
||
|
|
f1.write_text('def dup(): return 1')
|
||
|
|
f2 = Path(tmpdir) / 'b.py'
|
||
|
|
f2.write_text('def dup(): return 1')
|
||
|
|
functions = scan_directory(tmpdir)
|
||
|
|
results = find_duplicates(functions)
|
||
|
|
|
||
|
|
# Text report
|
||
|
|
text = generate_report(results, output_format='text')
|
||
|
|
assert 'CODE DUPLICATION REPORT' in text
|
||
|
|
assert 'Total functions' in text
|
||
|
|
print(" [PASS] text report format valid")
|
||
|
|
|
||
|
|
# JSON report
|
||
|
|
json_out = generate_report(results, output_format='json')
|
||
|
|
data = json.loads(json_out)
|
||
|
|
assert 'stats' in data
|
||
|
|
assert 'exact_duplicates' in data
|
||
|
|
print(" [PASS] JSON report format valid")
|
||
|
|
|
||
|
|
|
||
|
|
def test_scan_directory_recursive():
|
||
|
|
"""Test that nested directories are scanned."""
|
||
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||
|
|
subdir = Path(tmpdir) / 'sub'
|
||
|
|
subdir.mkdir()
|
||
|
|
(subdir / 'nested.py').write_text('def nested(): pass')
|
||
|
|
(Path(tmpdir) / 'root.py').write_text('def root(): pass')
|
||
|
|
functions = scan_directory(tmpdir)
|
||
|
|
names = {f['name'] for f in functions}
|
||
|
|
assert 'nested' in names and 'root' in names
|
||
|
|
print(" [PASS] recursive directory scanning works")
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == '__main__':
|
||
|
|
print("Running code duplication detector smoke tests...")
|
||
|
|
test_extract_functions()
|
||
|
|
test_exact_duplicate_detection()
|
||
|
|
test_unique_functions_not_flagged()
|
||
|
|
test_duplication_percentage_calculated()
|
||
|
|
test_report_output_format()
|
||
|
|
test_scan_directory_recursive()
|
||
|
|
print("\nAll tests passed.")
|