timmy-config/tests/test_generate_scenes_from_media.py

"""
Tests for scripts/generate_scenes_from_media.py — Media scene description generator.
"""

import json
import os
import tempfile
import unittest
from pathlib import Path

import sys
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
from generate_scenes_from_media import (
    find_media_files,
    file_hash,
    generate_description_prompt,
    parse_description,
    generate_training_pair,
    IMAGE_EXTENSIONS,
    VIDEO_EXTENSIONS,
)


class TestFindMediaFiles(unittest.TestCase):
    def test_finds_images(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            Path(tmpdir, "test.jpg").touch()
            Path(tmpdir, "test.png").touch()
            Path(tmpdir, "test.txt").touch()  # not media

            files = find_media_files(tmpdir)
            self.assertEqual(len(files), 2)

    def test_finds_videos(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            Path(tmpdir, "video.mp4").touch()
            Path(tmpdir, "video.mov").touch()

            files = find_media_files(tmpdir)
            self.assertEqual(len(files), 2)

    def test_max_limits_results(self):
        with tempfile.TemporaryDirectory() as tmpdir:
            for i in range(10):
                Path(tmpdir, f"img{i}.jpg").touch()

            files = find_media_files(tmpdir, max_files=3)
            self.assertEqual(len(files), 3)

    def test_missing_dir_returns_empty(self):
        files = find_media_files("/nonexistent/path")
        self.assertEqual(files, [])


class TestFileHash(unittest.TestCase):
    def test_consistent_hash(self):
        path = Path("/test/file.jpg")
        h1 = file_hash(path)
        h2 = file_hash(path)
        self.assertEqual(h1, h2)

    def test_different_files_different_hash(self):
        h1 = file_hash(Path("/test/a.jpg"))
        h2 = file_hash(Path("/test/b.jpg"))
        self.assertNotEqual(h1, h2)


class TestGenerateDescriptionPrompt(unittest.TestCase):
    def test_image_prompt(self):
        prompt = generate_description_prompt(Path("test.jpg"))
        self.assertIn("image", prompt.lower())
        self.assertIn("mood", prompt.lower())
        self.assertIn("colors", prompt.lower())

    def test_video_prompt(self):
        prompt = generate_description_prompt(Path("test.mp4"))
        self.assertIn("video", prompt.lower())
        self.assertIn("camera movement", prompt.lower())


class TestParseDescription(unittest.TestCase):
    def test_parses_json(self):
        text = '{"mood": "calm", "colors": ["blue", "white"], "composition": "wide shot", "camera": "static", "description": "A serene lake"}'
        result = parse_description(text)
        self.assertEqual(result["mood"], "calm")
        self.assertEqual(result["colors"], ["blue", "white"])

    def test_handles_plain_text(self):
        text = "This is a description of a scene with mood calm and colors blue, white."
        result = parse_description(text)
        self.assertIn("description", result)


class TestGenerateTrainingPair(unittest.TestCase):
    def test_pair_structure(self):
        filepath = Path("/test/photo.jpg")
        description = {"mood": "happy", "colors": ["gold"], "composition": "close-up", "camera": "static", "description": "Smiling face"}
        pair = generate_training_pair(filepath, description, "llava")

        self.assertEqual(pair["source_file"], str(filepath))
        self.assertEqual(pair["media_type"], "image")
        self.assertEqual(pair["model"], "llava")
        self.assertIn("source_session_id", pair)
        self.assertIn("timestamp", pair)
        self.assertEqual(pair["scene"]["mood"], "happy")

    def test_video_pair(self):
        filepath = Path("/test/video.mp4")
        description = {"mood": "energetic"}
        pair = generate_training_pair(filepath, description, "gpt-4")
        self.assertEqual(pair["media_type"], "video")


if __name__ == "__main__":
    unittest.main()