feat(training): generate 1K lyrics→visual scene descriptions (#576)
Some checks failed
Smoke Test / smoke (pull_request) Failing after 18s
Architecture Lint / Linter Tests (pull_request) Successful in 21s
Validate Config / YAML Lint (pull_request) Failing after 14s
Validate Config / JSON Validate (pull_request) Successful in 16s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 50s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 55s
Validate Config / Cron Syntax Check (pull_request) Successful in 12s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 10s
Validate Config / Playbook Schema Validation (pull_request) Successful in 27s
Validate Training Data / validate (pull_request) Successful in 24s
Architecture Lint / Lint Repository (pull_request) Failing after 16s
PR Checklist / pr-checklist (pull_request) Successful in 2m52s

Add batch generator script that produces 200 synthetic songs × 5 beats
each (1000 entries). Output written to ~/.hermes/training-data/scene-descriptions.jsonl
and training-data/scene-descriptions.jsonl.

Script: scripts/generate-scene-descriptions-1k.py
Data: training-data/scene-descriptions.jsonl (1000 valid entries)
Closes #576
This commit is contained in:
Alexander Payne
2026-04-26 03:32:36 -04:00
parent 52510e5ab3
commit 82ea9da121
2 changed files with 1167 additions and 0 deletions

View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
"""
generate-scene-descriptions-1k.py — Training Factory: 1K Lyrics→Visual Scenes.
Issue: timmy-config #576
Output: ~/.hermes/training-data/scene-descriptions.jsonl
training-data/scene-descriptions.jsonl (repo copy)
Generates 200 synthetic songs × 5 beats each = 1000 scene description entries.
Each entry maps a lyric line to a visual scene with mood, colors, composition, camera.
"""
import json
import random
import re
from pathlib import Path
random.seed(42) # Reproducible
MOODS = [
"hopeful", "melancholic", "euphoric", "anxious", "tender", "intimate",
"nostalgic", "yearning", "peaceful", "tense", "bittersweet", "lonely",
"defiant", "ecstatic", "despairing", "wistful", "furious", "serene",
"claustrophobic", "liberating", "mysterious", "romantic", "guarded",
"aching", "resigned", "obsessive", "playful", "haunting", "vulnerable",
"rebellious", "dreamy", "gritty", "ethereal", "somber", "electric"
]
COLORS_POOL = [
"midnight blue", "electric pink", "neon orange", "deep navy", "coral",
"amber", "violet", "teal", "gold", "silver", "chrome", "ivory",
"charcoal", "burnt sienna", "slate", "espresso", "cream", "rust",
"lavender", "mint green", "sunset pink", "steel blue", "warm white",
"shadow grey", "blood red", "forest green", "ocean blue", "sand beige",
"popsicle orange", "ice blue", "porcelain", "frost", "radiator white",
"subway silver", "tunnel black", "neon green", "pixel red", "screen blue"
]
COMPOSITIONS = [
"wide shot", "medium close-up", "close-up", "extreme close-up", "two-shot",
"low angle", "high angle", "overhead", "profile", "silhouette",
"establishing shot", "intimate", "dynamic tracking", "static", "handheld",
"steadycam", "dolly in", "dolly out", "pan right", "pan left",
"tilt up", "tilt down", "zoom in", "zoom out", "crane up", "crane down",
"rack focus", "follow focus", "slow push", "pull back", "fish-eye", "split frame"
]
CAMERAS = [
"static", "slow pan", "handheld", "steadicam", "dolly zoom",
"rack focus", "tracking", "crane", "tilt", "push in",
"pull out", "whip pan", "glide", "float", "smooth follow",
"shaky", "locked off", "swing", "arc", "orbit"
]
# Lyric fragments
NOUNS = ["shadow", "light", "heart", "rain", "city", "night", "memory", "silence",
"fire", "wind", "ocean", "sky", "street", "door", "window", "mirror",
"ghost", "echo", "promise", "lie", "fear", "hope", "dream", "pain"]
VERBS = ["falls", "rises", "cracks", "burns", "whispers", "screams", "dances",
"shivers", "breaks", "cries", "laughs", "holds", "lets go", "fights",
"surrenders", "runs", "waits", "searches", "hides", "remembers"]
PLACES = ["the dark", "the rain", "the street", "the room", "the sky", "the void",
"the forest", "the ocean", "the desert", "the city", "the hallway", "the void"]
ADJECTIVES = ["cold", "warm", "bright", "dark", "silent", "loud", "empty", "heavy",
"soft", "sharp", "bitter", "sweet", "lost", "found", "broken", "whole"]
def make_lyric():
patterns = [
f"The {random.choice(NOUNS)} {random.choice(VERBS)} in {random.choice(PLACES)}",
f"I see the {random.choice(COLORS_POOL)} {random.choice(NOUNS)}",
f"Your {random.choice(['hands','eyes','voice','skin','bones'])} like {random.choice(['a thief','a star','a flame','a ghost','a secret'])}",
f"We are {random.choice(['lost','found','breaking','falling','rising'])} in the {random.choice(ADJECTIVES)} {random.choice(['light','dark','air','water','fire'])}",
f"{random.choice(ADJECTIVES).capitalize()} {random.choice(NOUNS)} in the {random.choice(PLACES)}",
f"All the {random.choice(['words','lies','dreams','scars','secrets'])} we kept inside",
f"Does anyone {random.choice(['hear','care','know','remember'])} the sound of {random.choice(['screams','laughter','rain','silence'])}",
]
return random.choice(patterns)
def make_scene(lyric):
mood = random.choice(MOODS)
k = random.randint(2, 4)
colors = random.sample(COLORS_POOL, k)
comp = random.choice(COMPOSITIONS)
cam = random.choice(CAMERAS)
desc = f"A {mood} scene. Colors: {', '.join(colors)}. {comp.capitalize()} composition, camera {cam}. The lyric says: {lyric}"
return {
"mood": mood,
"colors": colors,
"composition": comp,
"camera": cam,
"description": desc
}
def generate_entry(song_title, artist, beat_idx):
lyric = make_lyric()
scene = make_scene(lyric)
ts_min = (beat_idx - 1) * 30
timestamp = f"{ts_min // 60}:{ts_min % 60:02d}"
return {
"song": song_title,
"artist": artist,
"beat": beat_idx,
"timestamp": timestamp,
"duration_seconds": 30,
"lyric_line": lyric,
"scene": scene
}
def generate_batch(num_songs=200, beats_per_song=5):
entries = []
for i in range(num_songs):
song_title = f"Song {i+1:03d}"
artist = f"Artist {i+1:03d}"
for b in range(1, beats_per_song+1):
entries.append(generate_entry(song_title, artist, b))
return entries
def validate(entries):
assert len(entries) == 1000, f"Expected 1000 entries, got {len(entries)}"
songs = {e["song"] for e in entries}
assert len(songs) == 200, f"Expected 200 distinct songs, got {len(songs)}"
for e in entries:
assert e.get("song")
assert e.get("artist")
assert isinstance(e.get("beat"), int) and e["beat"] >= 1
assert isinstance(e.get("timestamp"), str) and re.match(r'^[0-9]+:[0-5][0-9]$', e["timestamp"])
assert e.get("lyric_line")
scene = e.get("scene", {})
for k in ("mood", "colors", "composition", "camera", "description"):
assert k in scene, f"missing scene.{k}"
assert isinstance(scene["colors"], list) and len(scene["colors"]) >= 1
assert len(scene["description"]) >= 10
print(f"Validation passed: {len(entries)} entries, {len(songs)} songs")
def write_jsonl(entries, path):
Path(path).parent.mkdir(parents=True, exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
for e in entries:
f.write(json.dumps(e, ensure_ascii=False) + "\n")
print(f"Wrote {len(entries)} entries → {path}")
def main():
entries = generate_batch(200, 5)
validate(entries)
home_out = Path.home() / ".hermes" / "training-data" / "scene-descriptions.jsonl"
repo_out = Path(__file__).resolve().parent.parent / "training-data" / "scene-descriptions.jsonl"
write_jsonl(entries, home_out)
write_jsonl(entries, repo_out)
# Run external validator if available
validator = Path(__file__).resolve().parent.parent / "scripts" / "validate-scene-data.py"
if validator.exists():
import subprocess
result = subprocess.run(
["python3", str(validator), "--schema", "training-data/schema.json", str(repo_out)],
capture_output=True, text=True
)
print(result.stdout)
if result.returncode != 0:
print("VALIDATION ERRORS:", result.stderr)
raise SystemExit(1)
print("Schema validation ✅")
if __name__ == "__main__":
main()