diff --git a/scripts/generate_prompt_enhancement.py b/scripts/generate_prompt_enhancement.py new file mode 100644 index 00000000..2d8e07ff --- /dev/null +++ b/scripts/generate_prompt_enhancement.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +""" +generate_prompt_enhancement.py — Generate 3000 terse→rich prompt pairs. + +Domains covered (750 entries each): + - visual scenes + - music moods + - dream descriptions + - emotional weather + +Output: + ~/.hermes/training-data/prompt-enhancement.jsonl +""" + +import argparse, json, os, random +from datetime import datetime, timezone + +random.seed(575) + +OUTPUT_PATH = os.path.expanduser("~/.hermes/training-data/prompt-enhancement.jsonl") + +# --- Lexical pools drawn from existing data patterns --- +VISUAL_ADJS = [ + "whimsical", "melancholy", "pristine", "muted", "surreal", "epic", "serene", + "stormy", "ethereal", "gritty", "vibrant", "haunting", "breathtaking", + "ancient", "enchanted", "crystal", "frosty", "golden", "moonlit", "sun-drenched" +] +VISUAL_NOUNS = [ + "rain", "sunset", "fog", "beach", "mountain", "forest", "ocean", + "cityscape", "meadow", "canyon", "desert", "aurora", "glacier", + "river", "volcano", "swamp", "cliff", "prairie", "temple", "ruins" +] +VISUAL_ELEMENTS = [ + "moonlit shadows", "frost clinging to every surface", "birds wheeling in formation", + "lichen-covered stones", "wildflowers dotting the foreground", "mist rising from the valley", + "leaves rustling in wind", "dust motes caught in a shaft of light", + "reflections on still water", "silhouettes against the sky" +] +VISUAL_STYLES = [ + "painted in digital concept art", + "cinematic lighting", + "film noir style", + "dreamy soft focus", + "hyperrealistic detail", + "painterly brushstrokes", + "art nouveau linework", + "pop art vibrancy" +] + +MUSIC_EMOTIONS = [ + "aggression", "tenderness", "sorrow", "euphoria", "ominousness", "rage", + "serenity", "longing", "melancholy", "triumph", "despair", "hope" +] +MUSIC_INSTR = [ + "riff", "piano", "violin", "cello", "trumpet", "harp", "saxophone", + "drum", "flute", "guitar", "synth", "orchestra", "voice" +] +MUSIC_COLORS = { + "aggression": ["red", "black", "orange"], + "tenderness": ["soft pink", "cream", "warm gold"], + "sorrow": ["deep blue", "grey", "silver"], + "euphoria": ["bright gold", "white", "cyan"], + "ominousness": ["purple", "dark green", "blood red"], + "rage": ["crimson", "ember orange", "black"], + "serenity": ["pale blue", "seafoam green", "white"], + "longing": ["lavender", "dusty rose", "grey"], + "melancholy": ["slate grey", "faded blue", "ash"], + "triumph": ["royal blue", "gold", "white"], + "despair": ["mud brown", "charcoal", "dust"], + "hope": ["soft yellow", "warm white", "pale green"], +} +MUSIC_FRAMING = [ + "extreme close-up", "close-up", "medium shot", "wide shot", "long shot", "low angle", "high angle" +] + +DREAM_PHRASES = [ + "falling forever", "flying over ocean", "teeth crumbling", "melting face", + "being chased by shadows", "teeth falling out", "naked in public", "floating upwards", + "endless staircase", "lost wallet", "dead relatives speaking", "backwards childhood home", + "trapped in a mirror", "growing wings", "underwater breathing", "time looping", + "forgotten passwords", "public speaking naked", "infinite paperwork", "rooms that shift" +] +DREAM_FRAMINGS = [ + "rule of thirds framing with dolly forward movement", + "close-up framing with tracking shot movement", + "dutch angle framing with steady hold movement", + "wide shot framing with rack focus movement", + "low angle framing with pan movement", + "high angle framing with tilt movement", + "extreme close-up framing with zoom movement", + "silhouette framing against backlight" +] + +EMOTION_ADJS = [ + "cold", "thick", "raw", "beautiful", "heavy", "sharp", "bitter", "warm" +] +EMOTION_NOUNS = [ + "rage", "grief", "joy", "fear", "hope", "despair", "love", "loneliness" +] +EMOTION_COLORS = { + "rage": ["blood red", "black", "orange"], + "grief": ["charcoal", "deep blue", "ash"], + "joy": ["bright gold", "warm white", "orange"], + "fear": ["sickly green", "purple", "grey"], + "hope": ["soft pink", "pale yellow", "white"], + "despair": ["mud brown", "dark grey", "faded blue"], + "love": ["rose", "crimson", "gold"], + "loneliness": ["pale blue", "white", "grey"], +} +EMOTION_FRAMING = [ + "wide shot", "close-up", "silhouette frame", "high angle", + "low angle", "dutch angle", "bird's eye view", "subjective" +] + +def generate_visual(): + adj = random.choice(VISUAL_ADJS) + noun = random.choice(VISUAL_NOUNS) + elements = random.sample(VISUAL_ELEMENTS, k=random.randint(2, 3)) + style = random.choice(VISUAL_STYLES) + lighting = random.choice(["moonlit", "golden hour", "storm light", "misty dawn", "midday glare", "twilight"]) + terse = f"{adj} {noun}" + rich = f"A {adj} {noun} bathed in {lighting}, {', '.join(elements)}, {style}." + return {"terse": terse, "rich": rich, "domain": "visual scenes"} + +def generate_music(): + emotion_adj = random.choice(MUSIC_EMOTIONS) + instr = random.choice(MUSIC_INSTR) + colors = random.choice(list(MUSIC_COLORS.values())) + framing = random.choice(MUSIC_FRAMING) + terse = f"{emotion_adj} {instr}" + rich = f"A bowing visualization of {emotion_adj} through music. Colors: {', '.join(colors)}. {framing} framing." + return {"terse": terse, "rich": rich, "domain": "music moods"} + +def generate_dream(): + phrase = random.choice(DREAM_PHRASES) + framing = random.choice(DREAM_FRAMINGS) + terse = phrase + rich = f"A surreal dream visualization of '{phrase}'. {framing}." + return {"terse": terse, "rich": rich, "domain": "dream descriptions"} + +def generate_emotional(): + emotion = random.choice(EMOTION_NOUNS) + weather = random.choice(["storm", "rain", "sunrise", "fog", "thunder", "blizzard", "drought", "sky"]) + adj = random.choice(EMOTION_ADJS) + colors = random.choice(EMOTION_COLORS[emotion]) + framing = random.choice(EMOTION_FRAMING) + terse = f"{emotion} {weather}" + rich = f"An {adj} visualization of {emotion}. Colors: {colors}. {framing} framing." + return {"terse": terse, "rich": rich, "domain": "emotional weather"} + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--seed", type=int, default=575) + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--output", default=OUTPUT_PATH) + args = parser.parse_args() + + random.seed(args.seed) + print(f"Generating 3000 prompt-enhancement pairs (seed={args.seed})") + + needed = {"visual scenes": 750, "music moods": 750, "dream descriptions": 750, "emotional weather": 750} + total = sum(needed.values()) + pairs = [] + errors = 0 + + for domain, count in needed.items(): + domain_pairs = [] + for i in range(count): + if domain == "visual scenes": + pair = generate_visual() + elif domain == "music moods": + pair = generate_music() + elif domain == "dream descriptions": + pair = generate_dream() + elif domain == "emotional weather": + pair = generate_emotional() + else: + continue + pair["id"] = f"{domain.replace(' ', '-')}-{i+1:04d}" + pair["model"] = "generator-script" + pair["timestamp"] = datetime.now(timezone.utc).isoformat() + domain_pairs.append(pair) + pairs.extend(domain_pairs) + print(f" {domain}: {len(domain_pairs)} entries") + + assert len(pairs) == total, f"Expected {total}, got {len(pairs)}" + + if args.dry_run: + print("\nDry run — no output written.") + return 0 + + os.makedirs(os.path.dirname(args.output), exist_ok=True) + with open(args.output, "w") as f: + for p in pairs: + f.write(json.dumps(p, ensure_ascii=False) + "\n") + + print(f"\nTotal: {len(pairs)} pairs") + print(f"Output: {args.output}") + return 0 + +if __name__ == "__main__": + raise SystemExit(main())