import json import random from pathlib import Path # === SOVEREIGN DPO BUILDER — MODULAR & CLEAN === # Transforms curated chat logs into (prompt, chosen, rejected) pairs. # Adheres to SOUL.md: brevity, honesty, and sovereign tone. def score_response(response, rules): """Simple rule-based judge for Timmy's SOUL.md alignment.""" score = 0 if len(response) < 200: score += 1 # Brevity is a kindness if any(word in response.lower() for word in ["sovereign", "help", "plain"]): score += 1 if any(word in response.lower() for word in ["apologize", "sorry", "error"]): score += 0.5 return score def convert_to_dpo(input_path, output_path): """Convert curated_dataset.jsonl to DPO format.""" pairs = [] with open(input_path, 'r') as f: for line in f: try: data = json.loads(line) # Find the last human message and assistant response msgs = data.get("conversations", []) if len(msgs) < 2: continue prompt = next((m["value"] for m in reversed(msgs[:-1]) if m["from"] == "human"), None) chosen = msgs[-1]["value"] if msgs[-1]["from"] == "gpt" else None if not prompt or not chosen: continue # Generate a "rejected" example: verbose or non-sovereign rejected = f"I am very sorry to hear that. As an AI assistant, I want to provide you with the most comprehensive and detailed answer possible. {chosen} I hope this long and unnecessary explanation helps you in every possible way!" pairs.append({ "prompt": prompt, "chosen": chosen, "rejected": rejected }) except Exception: continue # Write DPO JSONL with open(output_path, 'w') as f: for p in pairs: f.write(json.dumps(p) + "\n") return len(pairs) if __name__ == "__main__": input_file = Path("training/data/curated_dataset.jsonl") output_file = Path("training/data/dpo_pairs.jsonl") if input_file.exists(): count = convert_to_dpo(input_file, output_file) print(f"Successfully generated {count} DPO pairs.") else: print("Error: Input file not found.")