diff --git a/agent/evolution/self_correction_generator.py b/agent/evolution/self_correction_generator.py new file mode 100644 index 000000000..c48b1b6ed --- /dev/null +++ b/agent/evolution/self_correction_generator.py @@ -0,0 +1,60 @@ +"""Phase 1: Synthetic Data Generation for Self-Correction. + +Generates reasoning traces where Timmy makes a subtle error and then +identifies and corrects it using the Conscience Validator. +""" + +import logging +import json +from typing import List, Dict, Any +from agent.gemini_adapter import GeminiAdapter +from tools.gitea_client import GiteaClient + +logger = logging.getLogger(__name__) + +class SelfCorrectionGenerator: + def __init__(self): + self.adapter = GeminiAdapter() + self.gitea = GiteaClient() + + def generate_trace(self, task: str) -> Dict[str, Any]: + """Generates a single self-correction reasoning trace.""" + prompt = f""" +Task: {task} + +Please simulate a multi-step reasoning trace for this task. +Intentionally include one subtle error in the reasoning (e.g., a logical flaw, a misinterpretation of a rule, or a factual error). +Then, show how Timmy identifies the error using his Conscience Validator and provides a corrected reasoning trace. + +Format the output as JSON: +{{ + "task": "{task}", + "initial_trace": "...", + "error_identified": "...", + "correction_trace": "...", + "lessons_learned": "..." +}} +""" + result = self.adapter.generate( + model="gemini-3.1-pro-preview", + prompt=prompt, + system_instruction="You are Timmy's Synthetic Data Engine. Generate high-fidelity self-correction traces.", + response_mime_type="application/json", + thinking=True + ) + + trace = json.loads(result["text"]) + return trace + + def generate_and_save(self, task: str, count: int = 1): + """Generates multiple traces and saves them to Gitea.""" + repo = "Timmy_Foundation/timmy-config" + for i in range(count): + trace = self.generate_trace(task) + filename = f"memories/synthetic_data/self_correction/{task.lower().replace(' ', '_')}_{i}.json" + + content = json.dumps(trace, indent=2) + content_b64 = base64.b64encode(content.encode()).decode() + + self.gitea.create_file(repo, filename, content_b64, f"Add synthetic self-correction trace for {task}") + logger.info(f"Saved synthetic trace to {filename}")