Some checks failed
Smoke Test / smoke (pull_request) Failing after 18s
Architecture Lint / Linter Tests (pull_request) Successful in 21s
Validate Config / YAML Lint (pull_request) Failing after 14s
Validate Config / JSON Validate (pull_request) Successful in 16s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 50s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 55s
Validate Config / Cron Syntax Check (pull_request) Successful in 12s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 10s
Validate Config / Playbook Schema Validation (pull_request) Successful in 27s
Validate Training Data / validate (pull_request) Successful in 24s
Architecture Lint / Lint Repository (pull_request) Failing after 16s
PR Checklist / pr-checklist (pull_request) Successful in 2m52s
Add batch generator script that produces 200 synthetic songs × 5 beats each (1000 entries). Output written to ~/.hermes/training-data/scene-descriptions.jsonl and training-data/scene-descriptions.jsonl. Script: scripts/generate-scene-descriptions-1k.py Data: training-data/scene-descriptions.jsonl (1000 valid entries) Closes #576
168 lines
7.1 KiB
Python
Executable File
168 lines
7.1 KiB
Python
Executable File
#!/usr/bin/env python3
|
||
"""
|
||
generate-scene-descriptions-1k.py — Training Factory: 1K Lyrics→Visual Scenes.
|
||
|
||
Issue: timmy-config #576
|
||
Output: ~/.hermes/training-data/scene-descriptions.jsonl
|
||
training-data/scene-descriptions.jsonl (repo copy)
|
||
|
||
Generates 200 synthetic songs × 5 beats each = 1000 scene description entries.
|
||
Each entry maps a lyric line to a visual scene with mood, colors, composition, camera.
|
||
"""
|
||
|
||
import json
|
||
import random
|
||
import re
|
||
from pathlib import Path
|
||
|
||
random.seed(42) # Reproducible
|
||
|
||
MOODS = [
|
||
"hopeful", "melancholic", "euphoric", "anxious", "tender", "intimate",
|
||
"nostalgic", "yearning", "peaceful", "tense", "bittersweet", "lonely",
|
||
"defiant", "ecstatic", "despairing", "wistful", "furious", "serene",
|
||
"claustrophobic", "liberating", "mysterious", "romantic", "guarded",
|
||
"aching", "resigned", "obsessive", "playful", "haunting", "vulnerable",
|
||
"rebellious", "dreamy", "gritty", "ethereal", "somber", "electric"
|
||
]
|
||
|
||
COLORS_POOL = [
|
||
"midnight blue", "electric pink", "neon orange", "deep navy", "coral",
|
||
"amber", "violet", "teal", "gold", "silver", "chrome", "ivory",
|
||
"charcoal", "burnt sienna", "slate", "espresso", "cream", "rust",
|
||
"lavender", "mint green", "sunset pink", "steel blue", "warm white",
|
||
"shadow grey", "blood red", "forest green", "ocean blue", "sand beige",
|
||
"popsicle orange", "ice blue", "porcelain", "frost", "radiator white",
|
||
"subway silver", "tunnel black", "neon green", "pixel red", "screen blue"
|
||
]
|
||
|
||
COMPOSITIONS = [
|
||
"wide shot", "medium close-up", "close-up", "extreme close-up", "two-shot",
|
||
"low angle", "high angle", "overhead", "profile", "silhouette",
|
||
"establishing shot", "intimate", "dynamic tracking", "static", "handheld",
|
||
"steadycam", "dolly in", "dolly out", "pan right", "pan left",
|
||
"tilt up", "tilt down", "zoom in", "zoom out", "crane up", "crane down",
|
||
"rack focus", "follow focus", "slow push", "pull back", "fish-eye", "split frame"
|
||
]
|
||
|
||
CAMERAS = [
|
||
"static", "slow pan", "handheld", "steadicam", "dolly zoom",
|
||
"rack focus", "tracking", "crane", "tilt", "push in",
|
||
"pull out", "whip pan", "glide", "float", "smooth follow",
|
||
"shaky", "locked off", "swing", "arc", "orbit"
|
||
]
|
||
|
||
# Lyric fragments
|
||
NOUNS = ["shadow", "light", "heart", "rain", "city", "night", "memory", "silence",
|
||
"fire", "wind", "ocean", "sky", "street", "door", "window", "mirror",
|
||
"ghost", "echo", "promise", "lie", "fear", "hope", "dream", "pain"]
|
||
VERBS = ["falls", "rises", "cracks", "burns", "whispers", "screams", "dances",
|
||
"shivers", "breaks", "cries", "laughs", "holds", "lets go", "fights",
|
||
"surrenders", "runs", "waits", "searches", "hides", "remembers"]
|
||
PLACES = ["the dark", "the rain", "the street", "the room", "the sky", "the void",
|
||
"the forest", "the ocean", "the desert", "the city", "the hallway", "the void"]
|
||
ADJECTIVES = ["cold", "warm", "bright", "dark", "silent", "loud", "empty", "heavy",
|
||
"soft", "sharp", "bitter", "sweet", "lost", "found", "broken", "whole"]
|
||
|
||
def make_lyric():
|
||
patterns = [
|
||
f"The {random.choice(NOUNS)} {random.choice(VERBS)} in {random.choice(PLACES)}",
|
||
f"I see the {random.choice(COLORS_POOL)} {random.choice(NOUNS)}",
|
||
f"Your {random.choice(['hands','eyes','voice','skin','bones'])} like {random.choice(['a thief','a star','a flame','a ghost','a secret'])}",
|
||
f"We are {random.choice(['lost','found','breaking','falling','rising'])} in the {random.choice(ADJECTIVES)} {random.choice(['light','dark','air','water','fire'])}",
|
||
f"{random.choice(ADJECTIVES).capitalize()} {random.choice(NOUNS)} in the {random.choice(PLACES)}",
|
||
f"All the {random.choice(['words','lies','dreams','scars','secrets'])} we kept inside",
|
||
f"Does anyone {random.choice(['hear','care','know','remember'])} the sound of {random.choice(['screams','laughter','rain','silence'])}",
|
||
]
|
||
return random.choice(patterns)
|
||
|
||
def make_scene(lyric):
|
||
mood = random.choice(MOODS)
|
||
k = random.randint(2, 4)
|
||
colors = random.sample(COLORS_POOL, k)
|
||
comp = random.choice(COMPOSITIONS)
|
||
cam = random.choice(CAMERAS)
|
||
desc = f"A {mood} scene. Colors: {', '.join(colors)}. {comp.capitalize()} composition, camera {cam}. The lyric says: {lyric}"
|
||
return {
|
||
"mood": mood,
|
||
"colors": colors,
|
||
"composition": comp,
|
||
"camera": cam,
|
||
"description": desc
|
||
}
|
||
|
||
def generate_entry(song_title, artist, beat_idx):
|
||
lyric = make_lyric()
|
||
scene = make_scene(lyric)
|
||
ts_min = (beat_idx - 1) * 30
|
||
timestamp = f"{ts_min // 60}:{ts_min % 60:02d}"
|
||
return {
|
||
"song": song_title,
|
||
"artist": artist,
|
||
"beat": beat_idx,
|
||
"timestamp": timestamp,
|
||
"duration_seconds": 30,
|
||
"lyric_line": lyric,
|
||
"scene": scene
|
||
}
|
||
|
||
def generate_batch(num_songs=200, beats_per_song=5):
|
||
entries = []
|
||
for i in range(num_songs):
|
||
song_title = f"Song {i+1:03d}"
|
||
artist = f"Artist {i+1:03d}"
|
||
for b in range(1, beats_per_song+1):
|
||
entries.append(generate_entry(song_title, artist, b))
|
||
return entries
|
||
|
||
def validate(entries):
|
||
assert len(entries) == 1000, f"Expected 1000 entries, got {len(entries)}"
|
||
songs = {e["song"] for e in entries}
|
||
assert len(songs) == 200, f"Expected 200 distinct songs, got {len(songs)}"
|
||
for e in entries:
|
||
assert e.get("song")
|
||
assert e.get("artist")
|
||
assert isinstance(e.get("beat"), int) and e["beat"] >= 1
|
||
assert isinstance(e.get("timestamp"), str) and re.match(r'^[0-9]+:[0-5][0-9]$', e["timestamp"])
|
||
assert e.get("lyric_line")
|
||
scene = e.get("scene", {})
|
||
for k in ("mood", "colors", "composition", "camera", "description"):
|
||
assert k in scene, f"missing scene.{k}"
|
||
assert isinstance(scene["colors"], list) and len(scene["colors"]) >= 1
|
||
assert len(scene["description"]) >= 10
|
||
print(f"Validation passed: {len(entries)} entries, {len(songs)} songs")
|
||
|
||
def write_jsonl(entries, path):
|
||
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
||
with open(path, "w", encoding="utf-8") as f:
|
||
for e in entries:
|
||
f.write(json.dumps(e, ensure_ascii=False) + "\n")
|
||
print(f"Wrote {len(entries)} entries → {path}")
|
||
|
||
def main():
|
||
entries = generate_batch(200, 5)
|
||
validate(entries)
|
||
|
||
home_out = Path.home() / ".hermes" / "training-data" / "scene-descriptions.jsonl"
|
||
repo_out = Path(__file__).resolve().parent.parent / "training-data" / "scene-descriptions.jsonl"
|
||
|
||
write_jsonl(entries, home_out)
|
||
write_jsonl(entries, repo_out)
|
||
|
||
# Run external validator if available
|
||
validator = Path(__file__).resolve().parent.parent / "scripts" / "validate-scene-data.py"
|
||
if validator.exists():
|
||
import subprocess
|
||
result = subprocess.run(
|
||
["python3", str(validator), "--schema", "training-data/schema.json", str(repo_out)],
|
||
capture_output=True, text=True
|
||
)
|
||
print(result.stdout)
|
||
if result.returncode != 0:
|
||
print("VALIDATION ERRORS:", result.stderr)
|
||
raise SystemExit(1)
|
||
print("Schema validation ✅")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|