feat(training): generate 1K lyrics→visual scene descriptions (#576)

Add batch generator script that produces 200 synthetic songs × 5 beats each (1000 entries). Output written to ~/.hermes/training-data/scene-descriptions.jsonl and training-data/scene-descriptions.jsonl. Script: scripts/generate-scene-descriptions-1k.py Data: training-data/scene-descriptions.jsonl (1000 valid entries) Closes #576
2026-04-26 03:32:36 -04:00
parent 52510e5ab3
commit 82ea9da121
2 changed files with 1167 additions and 0 deletions
--- a/scripts/generate-scene-descriptions-1k.py
+++ b/scripts/generate-scene-descriptions-1k.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+"""
+generate-scene-descriptions-1k.py — Training Factory: 1K Lyrics→Visual Scenes.
+
+Issue: timmy-config #576
+Output: ~/.hermes/training-data/scene-descriptions.jsonl
+        training-data/scene-descriptions.jsonl (repo copy)
+
+Generates 200 synthetic songs × 5 beats each = 1000 scene description entries.
+Each entry maps a lyric line to a visual scene with mood, colors, composition, camera.
+"""
+
+import json
+import random
+import re
+from pathlib import Path
+
+random.seed(42)  # Reproducible
+
+MOODS = [
+    "hopeful", "melancholic", "euphoric", "anxious", "tender", "intimate",
+    "nostalgic", "yearning", "peaceful", "tense", "bittersweet", "lonely",
+    "defiant", "ecstatic", "despairing", "wistful", "furious", "serene",
+    "claustrophobic", "liberating", "mysterious", "romantic", "guarded",
+    "aching", "resigned", "obsessive", "playful", "haunting", "vulnerable",
+    "rebellious", "dreamy", "gritty", "ethereal", "somber", "electric"
+]
+
+COLORS_POOL = [
+    "midnight blue", "electric pink", "neon orange", "deep navy", "coral",
+    "amber", "violet", "teal", "gold", "silver", "chrome", "ivory",
+    "charcoal", "burnt sienna", "slate", "espresso", "cream", "rust",
+    "lavender", "mint green", "sunset pink", "steel blue", "warm white",
+    "shadow grey", "blood red", "forest green", "ocean blue", "sand beige",
+    "popsicle orange", "ice blue", "porcelain", "frost", "radiator white",
+    "subway silver", "tunnel black", "neon green", "pixel red", "screen blue"
+]
+
+COMPOSITIONS = [
+    "wide shot", "medium close-up", "close-up", "extreme close-up", "two-shot",
+    "low angle", "high angle", "overhead", "profile", "silhouette",
+    "establishing shot", "intimate", "dynamic tracking", "static", "handheld",
+    "steadycam", "dolly in", "dolly out", "pan right", "pan left",
+    "tilt up", "tilt down", "zoom in", "zoom out", "crane up", "crane down",
+    "rack focus", "follow focus", "slow push", "pull back", "fish-eye", "split frame"
+]
+
+CAMERAS = [
+    "static", "slow pan", "handheld", "steadicam", "dolly zoom",
+    "rack focus", "tracking", "crane", "tilt", "push in",
+    "pull out", "whip pan", "glide", "float", "smooth follow",
+    "shaky", "locked off", "swing", "arc", "orbit"
+]
+
+# Lyric fragments
+NOUNS = ["shadow", "light", "heart", "rain", "city", "night", "memory", "silence",
+         "fire", "wind", "ocean", "sky", "street", "door", "window", "mirror",
+         "ghost", "echo", "promise", "lie", "fear", "hope", "dream", "pain"]
+VERBS = ["falls", "rises", "cracks", "burns", "whispers", "screams", "dances",
+         "shivers", "breaks", "cries", "laughs", "holds", "lets go", "fights",
+         "surrenders", "runs", "waits", "searches", "hides", "remembers"]
+PLACES = ["the dark", "the rain", "the street", "the room", "the sky", "the void",
+          "the forest", "the ocean", "the desert", "the city", "the hallway", "the void"]
+ADJECTIVES = ["cold", "warm", "bright", "dark", "silent", "loud", "empty", "heavy",
+              "soft", "sharp", "bitter", "sweet", "lost", "found", "broken", "whole"]
+
+def make_lyric():
+    patterns = [
+        f"The {random.choice(NOUNS)} {random.choice(VERBS)} in {random.choice(PLACES)}",
+        f"I see the {random.choice(COLORS_POOL)} {random.choice(NOUNS)}",
+        f"Your {random.choice(['hands','eyes','voice','skin','bones'])} like {random.choice(['a thief','a star','a flame','a ghost','a secret'])}",
+        f"We are {random.choice(['lost','found','breaking','falling','rising'])} in the {random.choice(ADJECTIVES)} {random.choice(['light','dark','air','water','fire'])}",
+        f"{random.choice(ADJECTIVES).capitalize()} {random.choice(NOUNS)} in the {random.choice(PLACES)}",
+        f"All the {random.choice(['words','lies','dreams','scars','secrets'])} we kept inside",
+        f"Does anyone {random.choice(['hear','care','know','remember'])} the sound of {random.choice(['screams','laughter','rain','silence'])}",
+    ]
+    return random.choice(patterns)
+
+def make_scene(lyric):
+    mood = random.choice(MOODS)
+    k = random.randint(2, 4)
+    colors = random.sample(COLORS_POOL, k)
+    comp = random.choice(COMPOSITIONS)
+    cam = random.choice(CAMERAS)
+    desc = f"A {mood} scene. Colors: {', '.join(colors)}. {comp.capitalize()} composition, camera {cam}. The lyric says: {lyric}"
+    return {
+        "mood": mood,
+        "colors": colors,
+        "composition": comp,
+        "camera": cam,
+        "description": desc
+    }
+
+def generate_entry(song_title, artist, beat_idx):
+    lyric = make_lyric()
+    scene = make_scene(lyric)
+    ts_min = (beat_idx - 1) * 30
+    timestamp = f"{ts_min // 60}:{ts_min % 60:02d}"
+    return {
+        "song": song_title,
+        "artist": artist,
+        "beat": beat_idx,
+        "timestamp": timestamp,
+        "duration_seconds": 30,
+        "lyric_line": lyric,
+        "scene": scene
+    }
+
+def generate_batch(num_songs=200, beats_per_song=5):
+    entries = []
+    for i in range(num_songs):
+        song_title = f"Song {i+1:03d}"
+        artist = f"Artist {i+1:03d}"
+        for b in range(1, beats_per_song+1):
+            entries.append(generate_entry(song_title, artist, b))
+    return entries
+
+def validate(entries):
+    assert len(entries) == 1000, f"Expected 1000 entries, got {len(entries)}"
+    songs = {e["song"] for e in entries}
+    assert len(songs) == 200, f"Expected 200 distinct songs, got {len(songs)}"
+    for e in entries:
+        assert e.get("song")
+        assert e.get("artist")
+        assert isinstance(e.get("beat"), int) and e["beat"] >= 1
+        assert isinstance(e.get("timestamp"), str) and re.match(r'^[0-9]+:[0-5][0-9]$', e["timestamp"])
+        assert e.get("lyric_line")
+        scene = e.get("scene", {})
+        for k in ("mood", "colors", "composition", "camera", "description"):
+            assert k in scene, f"missing scene.{k}"
+        assert isinstance(scene["colors"], list) and len(scene["colors"]) >= 1
+        assert len(scene["description"]) >= 10
+    print(f"Validation passed: {len(entries)} entries, {len(songs)} songs")
+
+def write_jsonl(entries, path):
+    Path(path).parent.mkdir(parents=True, exist_ok=True)
+    with open(path, "w", encoding="utf-8") as f:
+        for e in entries:
+            f.write(json.dumps(e, ensure_ascii=False) + "\n")
+    print(f"Wrote {len(entries)} entries → {path}")
+
+def main():
+    entries = generate_batch(200, 5)
+    validate(entries)
+
+    home_out = Path.home() / ".hermes" / "training-data" / "scene-descriptions.jsonl"
+    repo_out = Path(__file__).resolve().parent.parent / "training-data" / "scene-descriptions.jsonl"
+
+    write_jsonl(entries, home_out)
+    write_jsonl(entries, repo_out)
+
+    # Run external validator if available
+    validator = Path(__file__).resolve().parent.parent / "scripts" / "validate-scene-data.py"
+    if validator.exists():
+        import subprocess
+        result = subprocess.run(
+            ["python3", str(validator), "--schema", "training-data/schema.json", str(repo_out)],
+            capture_output=True, text=True
+        )
+        print(result.stdout)
+        if result.returncode != 0:
+            print("VALIDATION ERRORS:", result.stderr)
+            raise SystemExit(1)
+        print("Schema validation ✅")
+
+if __name__ == "__main__":
+    main()