#!/usr/bin/env python3 """Tests for github_trending_scanner.py — pure function validation. Tests the feature inference, extraction, and output formatting logic without relying on external GitHub API calls. """ import json import sys import tempfile from pathlib import Path # Add scripts dir to path for import sys.path.insert(0, str(Path(__file__).resolve().parent)) from github_trending_scanner import ( extract_repo_features, infer_features, save_trending, ) def test_infer_features_from_description(): """Feature inference extracts capabilities from description text.""" desc = "A local, quantized LLM framework for fine-tuning and agent-based RAG with vision." topics = ["ai", "llm"] features = infer_features(desc, topics) # Should include relevant capabilities (case-insensitive comparison) expected_lower = {"fine-tuning", "local/offline", "quantized models", "agent framework", "vision", "retrieval/rag"} actual_lower = set(f.lower() for f in features) assert expected_lower.issubset(actual_lower), f"Missing features. Expected subset of {expected_lower}, got {actual_lower}" print("PASS: infer_features_from_description") def test_infer_features_from_topics_only(): """Topics alone can drive feature detection.""" desc = "" topics = ["computer-vision", "speech", "pytorch"] features = infer_features(desc, topics) # Non-generic topics should appear as features (topics preserved as-is) assert "computer-vision" in features, f"Expected 'computer-vision' in {features}" assert "speech" in features, f"Expected 'speech' in {features}" # Generic topics (pytorch) may be filtered print(f"PASS: infer_features_from_topics_only → {features}") def test_extract_repo_features_produces_valid_structure(): """extract_repo_features returns all required fields.""" mock_repo = { "full_name": "example/repo", "description": "An example repository", "stargazers_count": 1234, "forks_count": 56, "open_issues_count": 7, "language": "Python", "topics": ["ai", "llm"], "html_url": "https://github.com/example/repo", "created_at": "2025-01-01T00:00:00Z", "updated_at": "2026-01-01T00:00:00Z", } result = extract_repo_features(mock_repo) assert result["name"] == "example/repo" assert result["description"] == "An example repository" assert result["stars"] == 1234 assert isinstance(result["key_features"], list) assert "scanned_at" in result assert result["url"] == "https://github.com/example/repo" print("PASS: extract_repo_features_structure") def test_save_trending_creates_dated_json(): """save_trending writes a valid JSON file with the expected schema.""" repos = [ { "name": "test/repo", "description": "Test repository", "stars": 999, "language": "Python", "topics": ["test"], "key_features": ["testing"], "scanned_at": "2026-04-26T00:00:00+00:00", } ] with tempfile.TemporaryDirectory() as tmp: output_file = save_trending(repos, output_dir=tmp) path = Path(output_file) assert path.exists(), f"Output file not created: {output_file}" with open(path) as f: data = json.load(f) assert "scanned_at" in data assert data["count"] == 1 assert isinstance(data["repos"], list) assert data["repos"][0]["name"] == "test/repo" print(f"PASS: save_trending → {output_file}") def test_save_trending_respects_output_dir_creation(): """Output directory is created if it doesn't exist.""" repos = [] with tempfile.TemporaryDirectory() as tmp: nested = Path(tmp) / "nested" / "trending" assert not nested.exists() output_file = save_trending(repos, output_dir=str(nested)) assert nested.exists() assert Path(output_file).exists() print("PASS: output_dir_creation") if __name__ == "__main__": test_infer_features_from_description() test_infer_features_from_topics_only() test_extract_repo_features_produces_valid_structure() test_save_trending_creates_dated_json() test_save_trending_respects_output_dir_creation() print("\nAll github_trending_scanner tests passed.")