src/tools/image_tools.py

"""Image generation tools — Pixel persona.

Uses FLUX.2 Klein 4B (or configurable model) via HuggingFace diffusers
for text-to-image generation, storyboard frames, and variations.

All heavy imports are lazy so the module loads instantly even without
a GPU or the ``creative`` extra installed.
"""

from __future__ import annotations

import json
import logging
import uuid
from pathlib import Path
from typing import Optional

logger = logging.getLogger(__name__)

# Lazy-loaded pipeline singleton
_pipeline = None


def _get_pipeline():
    """Lazy-load the FLUX diffusers pipeline."""
    global _pipeline
    if _pipeline is not None:
        return _pipeline

    try:
        import torch
        from diffusers import FluxPipeline
    except ImportError:
        raise ImportError(
            "Creative dependencies not installed. "
            "Run: pip install 'timmy-time[creative]'"
        )

    from config import settings

    model_id = getattr(settings, "flux_model_id", "black-forest-labs/FLUX.1-schnell")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float16 if device == "cuda" else torch.float32

    logger.info("Loading image model %s on %s …", model_id, device)
    _pipeline = FluxPipeline.from_pretrained(
        model_id, torch_dtype=dtype,
    ).to(device)
    logger.info("Image model loaded.")
    return _pipeline


def _output_dir() -> Path:
    from config import settings
    d = Path(getattr(settings, "image_output_dir", "data/images"))
    d.mkdir(parents=True, exist_ok=True)
    return d


def _save_metadata(image_path: Path, meta: dict) -> Path:
    meta_path = image_path.with_suffix(".json")
    meta_path.write_text(json.dumps(meta, indent=2))
    return meta_path


# ── Public tools ──────────────────────────────────────────────────────────────

def generate_image(
    prompt: str,
    negative_prompt: str = "",
    width: int = 1024,
    height: int = 1024,
    steps: int = 4,
    seed: Optional[int] = None,
) -> dict:
    """Generate an image from a text prompt.

    Returns dict with ``path``, ``width``, ``height``, and ``prompt``.
    """
    pipe = _get_pipeline()
    import torch

    generator = torch.Generator(device=pipe.device)
    if seed is not None:
        generator.manual_seed(seed)

    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt or None,
        width=width,
        height=height,
        num_inference_steps=steps,
        generator=generator,
    ).images[0]

    uid = uuid.uuid4().hex[:12]
    out_path = _output_dir() / f"{uid}.png"
    image.save(out_path)

    meta = {
        "id": uid, "prompt": prompt, "negative_prompt": negative_prompt,
        "width": width, "height": height, "steps": steps, "seed": seed,
    }
    _save_metadata(out_path, meta)

    return {"success": True, "path": str(out_path), **meta}


def generate_storyboard(
    scenes: list[str],
    width: int = 1024,
    height: int = 576,
    steps: int = 4,
) -> dict:
    """Generate a storyboard: one keyframe image per scene description.

    Args:
        scenes: List of scene description strings.

    Returns dict with list of generated frame paths.
    """
    frames = []
    for i, scene in enumerate(scenes):
        result = generate_image(
            prompt=scene, width=width, height=height, steps=steps,
        )
        result["scene_index"] = i
        result["scene_description"] = scene
        frames.append(result)
    return {"success": True, "frame_count": len(frames), "frames": frames}


def image_variations(
    prompt: str,
    count: int = 4,
    width: int = 1024,
    height: int = 1024,
    steps: int = 4,
) -> dict:
    """Generate multiple variations of the same prompt with different seeds."""
    import random
    variations = []
    for _ in range(count):
        seed = random.randint(0, 2**32 - 1)
        result = generate_image(
            prompt=prompt, width=width, height=height,
            steps=steps, seed=seed,
        )
        variations.append(result)
    return {"success": True, "count": len(variations), "variations": variations}


# ── Tool catalogue ────────────────────────────────────────────────────────────

IMAGE_TOOL_CATALOG: dict[str, dict] = {
    "generate_image": {
        "name": "Generate Image",
        "description": "Generate an image from a text prompt using FLUX",
        "fn": generate_image,
    },
    "generate_storyboard": {
        "name": "Generate Storyboard",
        "description": "Generate keyframe images for a sequence of scenes",
        "fn": generate_storyboard,
    },
    "image_variations": {
        "name": "Image Variations",
        "description": "Generate multiple variations of the same prompt",
        "fn": image_variations,
    },
}
feat: add full creative studio + DevOps tools (Pixel, Lyra, Reel personas) Adds 3 new personas (Pixel, Lyra, Reel) and 5 new tool modules: - Git/DevOps tools (GitPython): clone, status, diff, log, blame, branch, add, commit, push, pull, stash — wired to Forge and Helm personas - Image generation (FLUX via diffusers): text-to-image, storyboards, variations — Pixel persona - Music generation (ACE-Step 1.5): full songs with vocals+instrumentals, instrumental tracks, vocal-only tracks — Lyra persona - Video generation (Wan 2.1 via diffusers): text-to-video, image-to-video clips — Reel persona - Creative Director pipeline: multi-step orchestration that chains storyboard → music → video → assembly into 3+ minute final videos - Video assembler (MoviePy + FFmpeg): stitch clips, overlay audio, title cards, subtitles, final export Also includes: - Spark Intelligence tool-level + creative pipeline event capture - Creative Studio dashboard page (/creative/ui) with 4 tabs - Config settings for all new models and output directories - pyproject.toml creative optional extra for GPU dependencies - 107 new tests covering all modules (624 total, all passing) https://claude.ai/code/session_01KJm6jQkNi3aA3yoQJn636c 2026-02-24 16:31:47 +00:00			`"""Image generation tools — Pixel persona.`

			`Uses FLUX.2 Klein 4B (or configurable model) via HuggingFace diffusers`
			`for text-to-image generation, storyboard frames, and variations.`

			`All heavy imports are lazy so the module loads instantly even without`
			a GPU or the ``creative`` extra installed.
			`"""`

			`from __future__ import annotations`

			`import json`
			`import logging`
			`import uuid`
			`from pathlib import Path`
			`from typing import Optional`

			`logger = logging.getLogger(__name__)`

			`# Lazy-loaded pipeline singleton`
			`_pipeline = None`


			`def _get_pipeline():`
			`"""Lazy-load the FLUX diffusers pipeline."""`
			`global _pipeline`
			`if _pipeline is not None:`
			`return _pipeline`

			`try:`
			`import torch`
			`from diffusers import FluxPipeline`
			`except ImportError:`
			`raise ImportError(`
			`"Creative dependencies not installed. "`
			`"Run: pip install 'timmy-time[creative]'"`
			`)`

			`from config import settings`

			`model_id = getattr(settings, "flux_model_id", "black-forest-labs/FLUX.1-schnell")`
			`device = "cuda" if torch.cuda.is_available() else "cpu"`
			`dtype = torch.float16 if device == "cuda" else torch.float32`

			`logger.info("Loading image model %s on %s …", model_id, device)`
			`_pipeline = FluxPipeline.from_pretrained(`
			`model_id, torch_dtype=dtype,`
			`).to(device)`
			`logger.info("Image model loaded.")`
			`return _pipeline`


			`def _output_dir() -> Path:`
			`from config import settings`
			`d = Path(getattr(settings, "image_output_dir", "data/images"))`
			`d.mkdir(parents=True, exist_ok=True)`
			`return d`


			`def _save_metadata(image_path: Path, meta: dict) -> Path:`
			`meta_path = image_path.with_suffix(".json")`
			`meta_path.write_text(json.dumps(meta, indent=2))`
			`return meta_path`


			`# ── Public tools ──────────────────────────────────────────────────────────────`

			`def generate_image(`
			`prompt: str,`
			`negative_prompt: str = "",`
			`width: int = 1024,`
			`height: int = 1024,`
			`steps: int = 4,`
			`seed: Optional[int] = None,`
			`) -> dict:`
			`"""Generate an image from a text prompt.`

			Returns dict with ``path``, ``width``, ``height``, and ``prompt``.
			`"""`
			`pipe = _get_pipeline()`
			`import torch`

			`generator = torch.Generator(device=pipe.device)`
			`if seed is not None:`
			`generator.manual_seed(seed)`

			`image = pipe(`
			`prompt=prompt,`
			`negative_prompt=negative_prompt or None,`
			`width=width,`
			`height=height,`
			`num_inference_steps=steps,`
			`generator=generator,`
			`).images[0]`

			`uid = uuid.uuid4().hex[:12]`
			`out_path = _output_dir() / f"{uid}.png"`
			`image.save(out_path)`

			`meta = {`
			`"id": uid, "prompt": prompt, "negative_prompt": negative_prompt,`
			`"width": width, "height": height, "steps": steps, "seed": seed,`
			`}`
			`_save_metadata(out_path, meta)`

			`return {"success": True, "path": str(out_path), **meta}`


			`def generate_storyboard(`
			`scenes: list[str],`
			`width: int = 1024,`
			`height: int = 576,`
			`steps: int = 4,`
			`) -> dict:`
			`"""Generate a storyboard: one keyframe image per scene description.`

			`Args:`
			`scenes: List of scene description strings.`

			`Returns dict with list of generated frame paths.`
			`"""`
			`frames = []`
			`for i, scene in enumerate(scenes):`
			`result = generate_image(`
			`prompt=scene, width=width, height=height, steps=steps,`
			`)`
			`result["scene_index"] = i`
			`result["scene_description"] = scene`
			`frames.append(result)`
			`return {"success": True, "frame_count": len(frames), "frames": frames}`


			`def image_variations(`
			`prompt: str,`
			`count: int = 4,`
			`width: int = 1024,`
			`height: int = 1024,`
			`steps: int = 4,`
			`) -> dict:`
			`"""Generate multiple variations of the same prompt with different seeds."""`
			`import random`
			`variations = []`
			`for _ in range(count):`
			`seed = random.randint(0, 2**32 - 1)`
			`result = generate_image(`
			`prompt=prompt, width=width, height=height,`
			`steps=steps, seed=seed,`
			`)`
			`variations.append(result)`
			`return {"success": True, "count": len(variations), "variations": variations}`


			`# ── Tool catalogue ────────────────────────────────────────────────────────────`

			`IMAGE_TOOL_CATALOG: dict[str, dict] = {`
			`"generate_image": {`
			`"name": "Generate Image",`
			`"description": "Generate an image from a text prompt using FLUX",`
			`"fn": generate_image,`
			`},`
			`"generate_storyboard": {`
			`"name": "Generate Storyboard",`
			`"description": "Generate keyframe images for a sequence of scenes",`
			`"fn": generate_storyboard,`
			`},`
			`"image_variations": {`
			`"name": "Image Variations",`
			`"description": "Generate multiple variations of the same prompt",`
			`"fn": image_variations,`
			`},`
			`}`