""" Nexus Trajectory Logger — AutoLoRA Training Data from Lived Experience Every perceive→think→act cycle is a potential training sample. This logger writes them in ShareGPT JSONL format, compatible with the existing AutoLoRA pipeline (build_curated_dataset.py, train_modal.py). The key insight: the model trains on its own embodied experiences. Over time, the LoRA adapter shapes the base model into something that was born in the Nexus, not fine-tuned toward it. """ import json import time from pathlib import Path from typing import Optional DEFAULT_LOG_DIR = Path.home() / ".nexus" / "trajectories" class TrajectoryLogger: def __init__(self, log_dir: Optional[Path] = None, system_prompt: str = ""): self.log_dir = log_dir or DEFAULT_LOG_DIR self.log_dir.mkdir(parents=True, exist_ok=True) self.system_prompt = system_prompt # Current session self.session_id = f"nexus_{int(time.time())}" self.cycles: list[dict] = [] # Active log file — one per day today = time.strftime("%Y-%m-%d") self.log_file = self.log_dir / f"trajectory_{today}.jsonl" def log_cycle( self, perception: str, thought: str, actions: list[str], cycle_ms: int = 0, ): """Log one perceive→think→act cycle as a training sample. Format: ShareGPT JSONL — the same format used by build_curated_dataset.py and consumed by train_modal.py. The 'user' turn is the perception (what the world showed the model). The 'assistant' turn is the thought + action (what the model did). """ cycle = { "id": f"{self.session_id}_cycle_{len(self.cycles)}", "model": "nexus-embodied", "started_at": time.strftime("%Y-%m-%dT%H:%M:%S"), "cycle_ms": cycle_ms, "conversations": [ {"from": "system", "value": self.system_prompt}, {"from": "human", "value": perception}, {"from": "gpt", "value": thought}, ], } # If actions produced responses (speech), add them as follow-up for action_desc in actions: if action_desc: # Actions are appended as context — the model learning # that certain thoughts lead to certain world-effects cycle["conversations"].append( {"from": "human", "value": f"[World responds]: {action_desc}"} ) cycle["message_count"] = len(cycle["conversations"]) self.cycles.append(cycle) # Append to daily log file with open(self.log_file, "a") as f: f.write(json.dumps(cycle) + "\n") return cycle["id"] def get_session_stats(self) -> dict: """Stats for the current session.""" return { "session_id": self.session_id, "cycles": len(self.cycles), "log_file": str(self.log_file), "total_turns": sum( len(c["conversations"]) for c in self.cycles ), } def export_for_training(self, output_path: Optional[Path] = None) -> Path: """Export all trajectory files into a single training-ready JSONL. Merges all daily trajectory files into one dataset that can be fed directly to the AutoLoRA pipeline. """ output = output_path or (self.log_dir / "nexus_training_data.jsonl") all_cycles = [] for traj_file in sorted(self.log_dir.glob("trajectory_*.jsonl")): with open(traj_file) as f: for line in f: line = line.strip() if line: all_cycles.append(json.loads(line)) # Quality filter — only keep cycles where the model actually # produced meaningful thought (not just "Nothing has happened") quality_cycles = [] for cycle in all_cycles: convos = cycle.get("conversations", []) gpt_turns = [c for c in convos if c["from"] == "gpt"] for turn in gpt_turns: # Skip empty/trivial thoughts if len(turn["value"]) < 20: continue if "nothing has happened" in turn["value"].lower(): continue quality_cycles.append(cycle) break with open(output, "w") as f: for cycle in quality_cycles: f.write(json.dumps(cycle) + "\n") return output def list_trajectory_files(self) -> list[dict]: """List all trajectory files with stats.""" files = [] for traj_file in sorted(self.log_dir.glob("trajectory_*.jsonl")): count = 0 with open(traj_file) as f: for line in f: if line.strip(): count += 1 files.append({ "file": str(traj_file), "date": traj_file.stem.replace("trajectory_", ""), "cycles": count, "size_kb": traj_file.stat().st_size / 1024, }) return files