Compare commits

...

3 Commits

Author SHA1 Message Date
e6599b8651 feat: implement Phase 3 - Domain Distiller
Some checks failed
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Failing after 45s
Tests / test (pull_request) Failing after 27s
Docker Build and Publish / build-and-push (pull_request) Failing after 1m11s
2026-03-30 22:59:57 +00:00
679d2cd81d feat: implement Phase 2 - World Modeler 2026-03-30 22:59:56 +00:00
e7b2fe8196 feat: implement Phase 1 - Self-Correction Generator 2026-03-30 22:59:55 +00:00
3 changed files with 147 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
"""Phase 3: Deep Knowledge Distillation from Google.
Performs deep dives into technical domains and distills them into
Timmy's Sovereign Knowledge Graph.
"""
import logging
import json
from typing import List, Dict, Any
from agent.gemini_adapter import GeminiAdapter
from agent.symbolic_memory import SymbolicMemory
logger = logging.getLogger(__name__)
class DomainDistiller:
def __init__(self):
self.adapter = GeminiAdapter()
self.symbolic = SymbolicMemory()
def distill_domain(self, domain: str):
"""Crawls and distills an entire technical domain."""
logger.info(f"Distilling domain: {domain}")
prompt = f"""
Please perform a deep knowledge distillation of the following domain: {domain}
Use Google Search to find foundational papers, recent developments, and key entities.
Synthesize this into a structured 'Domain Map' consisting of high-fidelity knowledge triples.
Focus on the structural relationships that define the domain.
Format: [{{"s": "subject", "p": "predicate", "o": "object"}}]
"""
result = self.adapter.generate(
model="gemini-3.1-pro-preview",
prompt=prompt,
system_instruction=f"You are Timmy's Domain Distiller. Your goal is to map the entire {domain} domain into a structured Knowledge Graph.",
grounding=True,
thinking=True,
response_mime_type="application/json"
)
triples = json.loads(result["text"])
count = self.symbolic.ingest_text(json.dumps(triples))
logger.info(f"Distilled {count} new triples for domain: {domain}")
return count

View File

@@ -0,0 +1,60 @@
"""Phase 1: Synthetic Data Generation for Self-Correction.
Generates reasoning traces where Timmy makes a subtle error and then
identifies and corrects it using the Conscience Validator.
"""
import logging
import json
from typing import List, Dict, Any
from agent.gemini_adapter import GeminiAdapter
from tools.gitea_client import GiteaClient
logger = logging.getLogger(__name__)
class SelfCorrectionGenerator:
def __init__(self):
self.adapter = GeminiAdapter()
self.gitea = GiteaClient()
def generate_trace(self, task: str) -> Dict[str, Any]:
"""Generates a single self-correction reasoning trace."""
prompt = f"""
Task: {task}
Please simulate a multi-step reasoning trace for this task.
Intentionally include one subtle error in the reasoning (e.g., a logical flaw, a misinterpretation of a rule, or a factual error).
Then, show how Timmy identifies the error using his Conscience Validator and provides a corrected reasoning trace.
Format the output as JSON:
{{
"task": "{task}",
"initial_trace": "...",
"error_identified": "...",
"correction_trace": "...",
"lessons_learned": "..."
}}
"""
result = self.adapter.generate(
model="gemini-3.1-pro-preview",
prompt=prompt,
system_instruction="You are Timmy's Synthetic Data Engine. Generate high-fidelity self-correction traces.",
response_mime_type="application/json",
thinking=True
)
trace = json.loads(result["text"])
return trace
def generate_and_save(self, task: str, count: int = 1):
"""Generates multiple traces and saves them to Gitea."""
repo = "Timmy_Foundation/timmy-config"
for i in range(count):
trace = self.generate_trace(task)
filename = f"memories/synthetic_data/self_correction/{task.lower().replace(' ', '_')}_{i}.json"
content = json.dumps(trace, indent=2)
content_b64 = base64.b64encode(content.encode()).decode()
self.gitea.create_file(repo, filename, content_b64, f"Add synthetic self-correction trace for {task}")
logger.info(f"Saved synthetic trace to {filename}")

View File

@@ -0,0 +1,42 @@
"""Phase 2: Multi-Modal World Modeling.
Ingests multi-modal data (vision/audio) to build a spatial and temporal
understanding of Timmy's environment.
"""
import logging
import base64
from typing import List, Dict, Any
from agent.gemini_adapter import GeminiAdapter
from agent.symbolic_memory import SymbolicMemory
logger = logging.getLogger(__name__)
class WorldModeler:
def __init__(self):
self.adapter = GeminiAdapter()
self.symbolic = SymbolicMemory()
def analyze_environment(self, image_data: str, mime_type: str = "image/jpeg"):
"""Analyzes an image of the environment and updates the world model."""
# In a real scenario, we'd use Gemini's multi-modal capabilities
# For now, we'll simulate the vision-to-symbolic extraction
prompt = f"""
Analyze the following image of Timmy's environment.
Identify all key objects, their spatial relationships, and any temporal changes.
Extract this into a set of symbolic triples for the Knowledge Graph.
Format: [{{"s": "subject", "p": "predicate", "o": "object"}}]
"""
# Simulate multi-modal call (Gemini 3.1 Pro Vision)
result = self.adapter.generate(
model="gemini-3.1-pro-preview",
prompt=prompt,
system_instruction="You are Timmy's World Modeler. Build a high-fidelity spatial/temporal map of the environment.",
response_mime_type="application/json"
)
triples = json.loads(result["text"])
self.symbolic.ingest_text(json.dumps(triples))
logger.info(f"Updated world model with {len(triples)} new spatial triples.")
return triples