timmy-config/scripts/generate_prompt_enhancement.py

#!/usr/bin/env python3
"""
generate_prompt_enhancement.py — Generate 3000 terse→rich prompt pairs.

Domains covered (750 entries each):
  - visual scenes
  - music moods
  - dream descriptions
  - emotional weather

Output:
  ~/.hermes/training-data/prompt-enhancement.jsonl
"""

import argparse, json, os, random
from datetime import datetime, timezone

random.seed(575)

OUTPUT_PATH = os.path.expanduser("~/.hermes/training-data/prompt-enhancement.jsonl")

# --- Lexical pools drawn from existing data patterns ---
VISUAL_ADJS = [
    "whimsical", "melancholy", "pristine", "muted", "surreal", "epic", "serene",
    "stormy", "ethereal", "gritty", "vibrant", "haunting", "breathtaking",
    "ancient", "enchanted", "crystal", "frosty", "golden", "moonlit", "sun-drenched"
]
VISUAL_NOUNS = [
    "rain", "sunset", "fog", "beach", "mountain", "forest", "ocean",
    "cityscape", "meadow", "canyon", "desert", "aurora", "glacier",
    "river", "volcano", "swamp", "cliff", "prairie", "temple", "ruins"
]
VISUAL_ELEMENTS = [
    "moonlit shadows", "frost clinging to every surface", "birds wheeling in formation",
    "lichen-covered stones", "wildflowers dotting the foreground", "mist rising from the valley",
    "leaves rustling in wind", "dust motes caught in a shaft of light",
    "reflections on still water", "silhouettes against the sky"
]
VISUAL_STYLES = [
    "painted in digital concept art",
    "cinematic lighting",
    "film noir style",
    "dreamy soft focus",
    "hyperrealistic detail",
    "painterly brushstrokes",
    "art nouveau linework",
    "pop art vibrancy"
]

MUSIC_EMOTIONS = [
    "aggression", "tenderness", "sorrow", "euphoria", "ominousness", "rage",
    "serenity", "longing", "melancholy", "triumph", "despair", "hope"
]
MUSIC_INSTR = [
    "riff", "piano", "violin", "cello", "trumpet", "harp", "saxophone",
    "drum", "flute", "guitar", "synth", "orchestra", "voice"
]
MUSIC_COLORS = {
    "aggression": ["red", "black", "orange"],
    "tenderness": ["soft pink", "cream", "warm gold"],
    "sorrow": ["deep blue", "grey", "silver"],
    "euphoria": ["bright gold", "white", "cyan"],
    "ominousness": ["purple", "dark green", "blood red"],
    "rage": ["crimson", "ember orange", "black"],
    "serenity": ["pale blue", "seafoam green", "white"],
    "longing": ["lavender", "dusty rose", "grey"],
    "melancholy": ["slate grey", "faded blue", "ash"],
    "triumph": ["royal blue", "gold", "white"],
    "despair": ["mud brown", "charcoal", "dust"],
    "hope": ["soft yellow", "warm white", "pale green"],
}
MUSIC_FRAMING = [
    "extreme close-up", "close-up", "medium shot", "wide shot", "long shot", "low angle", "high angle"
]

DREAM_PHRASES = [
    "falling forever", "flying over ocean", "teeth crumbling", "melting face",
    "being chased by shadows", "teeth falling out", "naked in public", "floating upwards",
    "endless staircase", "lost wallet", "dead relatives speaking", "backwards childhood home",
    "trapped in a mirror", "growing wings", "underwater breathing", "time looping",
    "forgotten passwords", "public speaking naked", "infinite paperwork", "rooms that shift"
]
DREAM_FRAMINGS = [
    "rule of thirds framing with dolly forward movement",
    "close-up framing with tracking shot movement",
    "dutch angle framing with steady hold movement",
    "wide shot framing with rack focus movement",
    "low angle framing with pan movement",
    "high angle framing with tilt movement",
    "extreme close-up framing with zoom movement",
    "silhouette framing against backlight"
]

EMOTION_ADJS = [
    "cold", "thick", "raw", "beautiful", "heavy", "sharp", "bitter", "warm"
]
EMOTION_NOUNS = [
    "rage", "grief", "joy", "fear", "hope", "despair", "love", "loneliness"
]
EMOTION_COLORS = {
    "rage": ["blood red", "black", "orange"],
    "grief": ["charcoal", "deep blue", "ash"],
    "joy": ["bright gold", "warm white", "orange"],
    "fear": ["sickly green", "purple", "grey"],
    "hope": ["soft pink", "pale yellow", "white"],
    "despair": ["mud brown", "dark grey", "faded blue"],
    "love": ["rose", "crimson", "gold"],
    "loneliness": ["pale blue", "white", "grey"],
}
EMOTION_FRAMING = [
    "wide shot", "close-up", "silhouette frame", "high angle",
    "low angle", "dutch angle", "bird's eye view", "subjective"
]

def generate_visual():
    adj = random.choice(VISUAL_ADJS)
    noun = random.choice(VISUAL_NOUNS)
    elements = random.sample(VISUAL_ELEMENTS, k=random.randint(2, 3))
    style = random.choice(VISUAL_STYLES)
    lighting = random.choice(["moonlit", "golden hour", "storm light", "misty dawn", "midday glare", "twilight"])
    terse = f"{adj} {noun}"
    rich = f"A {adj} {noun} bathed in {lighting}, {', '.join(elements)}, {style}."
    return {"terse": terse, "rich": rich, "domain": "visual scenes"}

def generate_music():
    emotion_adj = random.choice(MUSIC_EMOTIONS)
    instr = random.choice(MUSIC_INSTR)
    colors = random.choice(list(MUSIC_COLORS.values()))
    framing = random.choice(MUSIC_FRAMING)
    terse = f"{emotion_adj} {instr}"
    rich = f"A bowing visualization of {emotion_adj} through music. Colors: {', '.join(colors)}. {framing} framing."
    return {"terse": terse, "rich": rich, "domain": "music moods"}

def generate_dream():
    phrase = random.choice(DREAM_PHRASES)
    framing = random.choice(DREAM_FRAMINGS)
    terse = phrase
    rich = f"A surreal dream visualization of '{phrase}'. {framing}."
    return {"terse": terse, "rich": rich, "domain": "dream descriptions"}

def generate_emotional():
    emotion = random.choice(EMOTION_NOUNS)
    weather = random.choice(["storm", "rain", "sunrise", "fog", "thunder", "blizzard", "drought", "sky"])
    adj = random.choice(EMOTION_ADJS)
    colors = random.choice(EMOTION_COLORS[emotion])
    framing = random.choice(EMOTION_FRAMING)
    terse = f"{emotion} {weather}"
    rich = f"An {adj} visualization of {emotion}. Colors: {colors}. {framing} framing."
    return {"terse": terse, "rich": rich, "domain": "emotional weather"}

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--seed", type=int, default=575)
    parser.add_argument("--dry-run", action="store_true")
    parser.add_argument("--output", default=OUTPUT_PATH)
    args = parser.parse_args()

    random.seed(args.seed)
    print(f"Generating 3000 prompt-enhancement pairs (seed={args.seed})")

    needed = {"visual scenes": 750, "music moods": 750, "dream descriptions": 750, "emotional weather": 750}
    total = sum(needed.values())
    pairs = []
    errors = 0

    for domain, count in needed.items():
        domain_pairs = []
        for i in range(count):
            if domain == "visual scenes":
                pair = generate_visual()
            elif domain == "music moods":
                pair = generate_music()
            elif domain == "dream descriptions":
                pair = generate_dream()
            elif domain == "emotional weather":
                pair = generate_emotional()
            else:
                continue
            pair["id"] = f"{domain.replace(' ', '-')}-{i+1:04d}"
            pair["model"] = "generator-script"
            pair["timestamp"] = datetime.now(timezone.utc).isoformat()
            domain_pairs.append(pair)
        pairs.extend(domain_pairs)
        print(f"  {domain}: {len(domain_pairs)} entries")

    assert len(pairs) == total, f"Expected {total}, got {len(pairs)}"

    if args.dry_run:
        print("\nDry run — no output written.")
        return 0

    os.makedirs(os.path.dirname(args.output), exist_ok=True)
    with open(args.output, "w") as f:
        for p in pairs:
            f.write(json.dumps(p, ensure_ascii=False) + "\n")

    print(f"\nTotal: {len(pairs)} pairs")
    print(f"Output: {args.output}")
    return 0

if __name__ == "__main__":
    raise SystemExit(main())