203 lines
7.7 KiB
Python
203 lines
7.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
generate_prompt_enhancement.py — Generate 3000 terse→rich prompt pairs.
|
|
|
|
Domains covered (750 entries each):
|
|
- visual scenes
|
|
- music moods
|
|
- dream descriptions
|
|
- emotional weather
|
|
|
|
Output:
|
|
~/.hermes/training-data/prompt-enhancement.jsonl
|
|
"""
|
|
|
|
import argparse, json, os, random
|
|
from datetime import datetime, timezone
|
|
|
|
random.seed(575)
|
|
|
|
OUTPUT_PATH = os.path.expanduser("~/.hermes/training-data/prompt-enhancement.jsonl")
|
|
|
|
# --- Lexical pools drawn from existing data patterns ---
|
|
VISUAL_ADJS = [
|
|
"whimsical", "melancholy", "pristine", "muted", "surreal", "epic", "serene",
|
|
"stormy", "ethereal", "gritty", "vibrant", "haunting", "breathtaking",
|
|
"ancient", "enchanted", "crystal", "frosty", "golden", "moonlit", "sun-drenched"
|
|
]
|
|
VISUAL_NOUNS = [
|
|
"rain", "sunset", "fog", "beach", "mountain", "forest", "ocean",
|
|
"cityscape", "meadow", "canyon", "desert", "aurora", "glacier",
|
|
"river", "volcano", "swamp", "cliff", "prairie", "temple", "ruins"
|
|
]
|
|
VISUAL_ELEMENTS = [
|
|
"moonlit shadows", "frost clinging to every surface", "birds wheeling in formation",
|
|
"lichen-covered stones", "wildflowers dotting the foreground", "mist rising from the valley",
|
|
"leaves rustling in wind", "dust motes caught in a shaft of light",
|
|
"reflections on still water", "silhouettes against the sky"
|
|
]
|
|
VISUAL_STYLES = [
|
|
"painted in digital concept art",
|
|
"cinematic lighting",
|
|
"film noir style",
|
|
"dreamy soft focus",
|
|
"hyperrealistic detail",
|
|
"painterly brushstrokes",
|
|
"art nouveau linework",
|
|
"pop art vibrancy"
|
|
]
|
|
|
|
MUSIC_EMOTIONS = [
|
|
"aggression", "tenderness", "sorrow", "euphoria", "ominousness", "rage",
|
|
"serenity", "longing", "melancholy", "triumph", "despair", "hope"
|
|
]
|
|
MUSIC_INSTR = [
|
|
"riff", "piano", "violin", "cello", "trumpet", "harp", "saxophone",
|
|
"drum", "flute", "guitar", "synth", "orchestra", "voice"
|
|
]
|
|
MUSIC_COLORS = {
|
|
"aggression": ["red", "black", "orange"],
|
|
"tenderness": ["soft pink", "cream", "warm gold"],
|
|
"sorrow": ["deep blue", "grey", "silver"],
|
|
"euphoria": ["bright gold", "white", "cyan"],
|
|
"ominousness": ["purple", "dark green", "blood red"],
|
|
"rage": ["crimson", "ember orange", "black"],
|
|
"serenity": ["pale blue", "seafoam green", "white"],
|
|
"longing": ["lavender", "dusty rose", "grey"],
|
|
"melancholy": ["slate grey", "faded blue", "ash"],
|
|
"triumph": ["royal blue", "gold", "white"],
|
|
"despair": ["mud brown", "charcoal", "dust"],
|
|
"hope": ["soft yellow", "warm white", "pale green"],
|
|
}
|
|
MUSIC_FRAMING = [
|
|
"extreme close-up", "close-up", "medium shot", "wide shot", "long shot", "low angle", "high angle"
|
|
]
|
|
|
|
DREAM_PHRASES = [
|
|
"falling forever", "flying over ocean", "teeth crumbling", "melting face",
|
|
"being chased by shadows", "teeth falling out", "naked in public", "floating upwards",
|
|
"endless staircase", "lost wallet", "dead relatives speaking", "backwards childhood home",
|
|
"trapped in a mirror", "growing wings", "underwater breathing", "time looping",
|
|
"forgotten passwords", "public speaking naked", "infinite paperwork", "rooms that shift"
|
|
]
|
|
DREAM_FRAMINGS = [
|
|
"rule of thirds framing with dolly forward movement",
|
|
"close-up framing with tracking shot movement",
|
|
"dutch angle framing with steady hold movement",
|
|
"wide shot framing with rack focus movement",
|
|
"low angle framing with pan movement",
|
|
"high angle framing with tilt movement",
|
|
"extreme close-up framing with zoom movement",
|
|
"silhouette framing against backlight"
|
|
]
|
|
|
|
EMOTION_ADJS = [
|
|
"cold", "thick", "raw", "beautiful", "heavy", "sharp", "bitter", "warm"
|
|
]
|
|
EMOTION_NOUNS = [
|
|
"rage", "grief", "joy", "fear", "hope", "despair", "love", "loneliness"
|
|
]
|
|
EMOTION_COLORS = {
|
|
"rage": ["blood red", "black", "orange"],
|
|
"grief": ["charcoal", "deep blue", "ash"],
|
|
"joy": ["bright gold", "warm white", "orange"],
|
|
"fear": ["sickly green", "purple", "grey"],
|
|
"hope": ["soft pink", "pale yellow", "white"],
|
|
"despair": ["mud brown", "dark grey", "faded blue"],
|
|
"love": ["rose", "crimson", "gold"],
|
|
"loneliness": ["pale blue", "white", "grey"],
|
|
}
|
|
EMOTION_FRAMING = [
|
|
"wide shot", "close-up", "silhouette frame", "high angle",
|
|
"low angle", "dutch angle", "bird's eye view", "subjective"
|
|
]
|
|
|
|
def generate_visual():
|
|
adj = random.choice(VISUAL_ADJS)
|
|
noun = random.choice(VISUAL_NOUNS)
|
|
elements = random.sample(VISUAL_ELEMENTS, k=random.randint(2, 3))
|
|
style = random.choice(VISUAL_STYLES)
|
|
lighting = random.choice(["moonlit", "golden hour", "storm light", "misty dawn", "midday glare", "twilight"])
|
|
terse = f"{adj} {noun}"
|
|
rich = f"A {adj} {noun} bathed in {lighting}, {', '.join(elements)}, {style}."
|
|
return {"terse": terse, "rich": rich, "domain": "visual scenes"}
|
|
|
|
def generate_music():
|
|
emotion_adj = random.choice(MUSIC_EMOTIONS)
|
|
instr = random.choice(MUSIC_INSTR)
|
|
colors = random.choice(list(MUSIC_COLORS.values()))
|
|
framing = random.choice(MUSIC_FRAMING)
|
|
terse = f"{emotion_adj} {instr}"
|
|
rich = f"A bowing visualization of {emotion_adj} through music. Colors: {', '.join(colors)}. {framing} framing."
|
|
return {"terse": terse, "rich": rich, "domain": "music moods"}
|
|
|
|
def generate_dream():
|
|
phrase = random.choice(DREAM_PHRASES)
|
|
framing = random.choice(DREAM_FRAMINGS)
|
|
terse = phrase
|
|
rich = f"A surreal dream visualization of '{phrase}'. {framing}."
|
|
return {"terse": terse, "rich": rich, "domain": "dream descriptions"}
|
|
|
|
def generate_emotional():
|
|
emotion = random.choice(EMOTION_NOUNS)
|
|
weather = random.choice(["storm", "rain", "sunrise", "fog", "thunder", "blizzard", "drought", "sky"])
|
|
adj = random.choice(EMOTION_ADJS)
|
|
colors = random.choice(EMOTION_COLORS[emotion])
|
|
framing = random.choice(EMOTION_FRAMING)
|
|
terse = f"{emotion} {weather}"
|
|
rich = f"An {adj} visualization of {emotion}. Colors: {colors}. {framing} framing."
|
|
return {"terse": terse, "rich": rich, "domain": "emotional weather"}
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--seed", type=int, default=575)
|
|
parser.add_argument("--dry-run", action="store_true")
|
|
parser.add_argument("--output", default=OUTPUT_PATH)
|
|
args = parser.parse_args()
|
|
|
|
random.seed(args.seed)
|
|
print(f"Generating 3000 prompt-enhancement pairs (seed={args.seed})")
|
|
|
|
needed = {"visual scenes": 750, "music moods": 750, "dream descriptions": 750, "emotional weather": 750}
|
|
total = sum(needed.values())
|
|
pairs = []
|
|
errors = 0
|
|
|
|
for domain, count in needed.items():
|
|
domain_pairs = []
|
|
for i in range(count):
|
|
if domain == "visual scenes":
|
|
pair = generate_visual()
|
|
elif domain == "music moods":
|
|
pair = generate_music()
|
|
elif domain == "dream descriptions":
|
|
pair = generate_dream()
|
|
elif domain == "emotional weather":
|
|
pair = generate_emotional()
|
|
else:
|
|
continue
|
|
pair["id"] = f"{domain.replace(' ', '-')}-{i+1:04d}"
|
|
pair["model"] = "generator-script"
|
|
pair["timestamp"] = datetime.now(timezone.utc).isoformat()
|
|
domain_pairs.append(pair)
|
|
pairs.extend(domain_pairs)
|
|
print(f" {domain}: {len(domain_pairs)} entries")
|
|
|
|
assert len(pairs) == total, f"Expected {total}, got {len(pairs)}"
|
|
|
|
if args.dry_run:
|
|
print("\nDry run — no output written.")
|
|
return 0
|
|
|
|
os.makedirs(os.path.dirname(args.output), exist_ok=True)
|
|
with open(args.output, "w") as f:
|
|
for p in pairs:
|
|
f.write(json.dumps(p, ensure_ascii=False) + "\n")
|
|
|
|
print(f"\nTotal: {len(pairs)} pairs")
|
|
print(f"Output: {args.output}")
|
|
return 0
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|