nexus/trajectory_logger.py

"""
Nexus Trajectory Logger — AutoLoRA Training Data from Lived Experience

Every perceive→think→act cycle is a potential training sample.
This logger writes them in ShareGPT JSONL format, compatible with
the existing AutoLoRA pipeline (build_curated_dataset.py, train_modal.py).

The key insight: the model trains on its own embodied experiences.
Over time, the LoRA adapter shapes the base model into something
that was born in the Nexus, not fine-tuned toward it.
"""

import json
import time
from pathlib import Path
from typing import Optional

DEFAULT_LOG_DIR = Path.home() / ".nexus" / "trajectories"


class TrajectoryLogger:
    def __init__(self, log_dir: Optional[Path] = None, system_prompt: str = ""):
        self.log_dir = log_dir or DEFAULT_LOG_DIR
        self.log_dir.mkdir(parents=True, exist_ok=True)
        self.system_prompt = system_prompt

        # Current session
        self.session_id = f"nexus_{int(time.time())}"
        self.cycles: list[dict] = []

        # Active log file — one per day
        today = time.strftime("%Y-%m-%d")
        self.log_file = self.log_dir / f"trajectory_{today}.jsonl"

    def log_cycle(
        self,
        perception: str,
        thought: str,
        actions: list[str],
        cycle_ms: int = 0,
    ):
        """Log one perceive→think→act cycle as a training sample.

        Format: ShareGPT JSONL — the same format used by
        build_curated_dataset.py and consumed by train_modal.py.

        The 'user' turn is the perception (what the world showed the model).
        The 'assistant' turn is the thought + action (what the model did).
        """
        cycle = {
            "id": f"{self.session_id}_cycle_{len(self.cycles)}",
            "model": "nexus-embodied",
            "started_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
            "cycle_ms": cycle_ms,
            "conversations": [
                {"from": "system", "value": self.system_prompt},
                {"from": "human", "value": perception},
                {"from": "gpt", "value": thought},
            ],
        }

        # If actions produced responses (speech), add them as follow-up
        for action_desc in actions:
            if action_desc:
                # Actions are appended as context — the model learning
                # that certain thoughts lead to certain world-effects
                cycle["conversations"].append(
                    {"from": "human", "value": f"[World responds]: {action_desc}"}
                )

        cycle["message_count"] = len(cycle["conversations"])
        self.cycles.append(cycle)

        # Append to daily log file
        with open(self.log_file, "a") as f:
            f.write(json.dumps(cycle) + "\n")

        return cycle["id"]

    def get_session_stats(self) -> dict:
        """Stats for the current session."""
        return {
            "session_id": self.session_id,
            "cycles": len(self.cycles),
            "log_file": str(self.log_file),
            "total_turns": sum(
                len(c["conversations"]) for c in self.cycles
            ),
        }

    def export_for_training(self, output_path: Optional[Path] = None) -> Path:
        """Export all trajectory files into a single training-ready JSONL.

        Merges all daily trajectory files into one dataset that can be
        fed directly to the AutoLoRA pipeline.
        """
        output = output_path or (self.log_dir / "nexus_training_data.jsonl")

        all_cycles = []
        for traj_file in sorted(self.log_dir.glob("trajectory_*.jsonl")):
            with open(traj_file) as f:
                for line in f:
                    line = line.strip()
                    if line:
                        all_cycles.append(json.loads(line))

        # Quality filter — only keep cycles where the model actually
        # produced meaningful thought (not just "Nothing has happened")
        quality_cycles = []
        for cycle in all_cycles:
            convos = cycle.get("conversations", [])
            gpt_turns = [c for c in convos if c["from"] == "gpt"]
            for turn in gpt_turns:
                # Skip empty/trivial thoughts
                if len(turn["value"]) < 20:
                    continue
                if "nothing has happened" in turn["value"].lower():
                    continue
                quality_cycles.append(cycle)
                break

        with open(output, "w") as f:
            for cycle in quality_cycles:
                f.write(json.dumps(cycle) + "\n")

        return output

    def list_trajectory_files(self) -> list[dict]:
        """List all trajectory files with stats."""
        files = []
        for traj_file in sorted(self.log_dir.glob("trajectory_*.jsonl")):
            count = 0
            with open(traj_file) as f:
                for line in f:
                    if line.strip():
                        count += 1
            files.append({
                "file": str(traj_file),
                "date": traj_file.stem.replace("trajectory_", ""),
                "cycles": count,
                "size_kb": traj_file.stat().st_size / 1024,
            })
        return files
feat: Nexus Mind — embodied consciousness loop for 8B sovereign brain Adds the perception adapter, experience store, trajectory logger, and consciousness loop that give Timmy a body in the Nexus. Architecture: BIRTH.md — Thin system prompt. SOUL.md conscience + embodied awareness. No meta-knowledge about implementation. perception_adapter — Translates WS events to natural-language sensory descriptions. Parses model output into WS actions. experience_store — SQLite-backed lived-experience memory. The model remembers only what it perceived through its channel. trajectory_logger — Logs every perceive→think→act cycle as ShareGPT JSONL, compatible with the AutoLoRA training pipeline. nexus_think — The consciousness loop. Connects to WS gateway, receives perceptions, thinks via Ollama, sends actions. The 8B model wakes up knowing nothing but its values and what it experiences. Training loops close on lived experience — emergence through the channel, not through fine-tuning toward behaviors. Run: python -m nexus.nexus_think --model timmy:v0.1-q4 --ws ws://localhost:8765 2026-03-25 17:19:58 +00:00			`"""`
			`Nexus Trajectory Logger — AutoLoRA Training Data from Lived Experience`

			`Every perceive→think→act cycle is a potential training sample.`
			`This logger writes them in ShareGPT JSONL format, compatible with`
			`the existing AutoLoRA pipeline (build_curated_dataset.py, train_modal.py).`

			`The key insight: the model trains on its own embodied experiences.`
			`Over time, the LoRA adapter shapes the base model into something`
			`that was born in the Nexus, not fine-tuned toward it.`
			`"""`

			`import json`
			`import time`
			`from pathlib import Path`
			`from typing import Optional`

			`DEFAULT_LOG_DIR = Path.home() / ".nexus" / "trajectories"`


			`class TrajectoryLogger:`
			`def __init__(self, log_dir: Optional[Path] = None, system_prompt: str = ""):`
			`self.log_dir = log_dir or DEFAULT_LOG_DIR`
			`self.log_dir.mkdir(parents=True, exist_ok=True)`
			`self.system_prompt = system_prompt`

			`# Current session`
			`self.session_id = f"nexus_{int(time.time())}"`
			`self.cycles: list[dict] = []`

			`# Active log file — one per day`
			`today = time.strftime("%Y-%m-%d")`
			`self.log_file = self.log_dir / f"trajectory_{today}.jsonl"`

			`def log_cycle(`
			`self,`
			`perception: str,`
			`thought: str,`
			`actions: list[str],`
			`cycle_ms: int = 0,`
			`):`
			`"""Log one perceive→think→act cycle as a training sample.`

			`Format: ShareGPT JSONL — the same format used by`
			`build_curated_dataset.py and consumed by train_modal.py.`

			`The 'user' turn is the perception (what the world showed the model).`
			`The 'assistant' turn is the thought + action (what the model did).`
			`"""`
			`cycle = {`
			`"id": f"{self.session_id}_cycle_{len(self.cycles)}",`
			`"model": "nexus-embodied",`
			`"started_at": time.strftime("%Y-%m-%dT%H:%M:%S"),`
			`"cycle_ms": cycle_ms,`
			`"conversations": [`
			`{"from": "system", "value": self.system_prompt},`
			`{"from": "human", "value": perception},`
			`{"from": "gpt", "value": thought},`
			`],`
			`}`

			`# If actions produced responses (speech), add them as follow-up`
			`for action_desc in actions:`
			`if action_desc:`
			`# Actions are appended as context — the model learning`
			`# that certain thoughts lead to certain world-effects`
			`cycle["conversations"].append(`
			`{"from": "human", "value": f"[World responds]: {action_desc}"}`
			`)`

			`cycle["message_count"] = len(cycle["conversations"])`
			`self.cycles.append(cycle)`

			`# Append to daily log file`
			`with open(self.log_file, "a") as f:`
			`f.write(json.dumps(cycle) + "\n")`

			`return cycle["id"]`

			`def get_session_stats(self) -> dict:`
			`"""Stats for the current session."""`
			`return {`
			`"session_id": self.session_id,`
			`"cycles": len(self.cycles),`
			`"log_file": str(self.log_file),`
			`"total_turns": sum(`
			`len(c["conversations"]) for c in self.cycles`
			`),`
			`}`

			`def export_for_training(self, output_path: Optional[Path] = None) -> Path:`
			`"""Export all trajectory files into a single training-ready JSONL.`

			`Merges all daily trajectory files into one dataset that can be`
			`fed directly to the AutoLoRA pipeline.`
			`"""`
			`output = output_path or (self.log_dir / "nexus_training_data.jsonl")`

			`all_cycles = []`
			`for traj_file in sorted(self.log_dir.glob("trajectory_*.jsonl")):`
			`with open(traj_file) as f:`
			`for line in f:`
			`line = line.strip()`
			`if line:`
			`all_cycles.append(json.loads(line))`

			`# Quality filter — only keep cycles where the model actually`
			`# produced meaningful thought (not just "Nothing has happened")`
			`quality_cycles = []`
			`for cycle in all_cycles:`
			`convos = cycle.get("conversations", [])`
			`gpt_turns = [c for c in convos if c["from"] == "gpt"]`
			`for turn in gpt_turns:`
			`# Skip empty/trivial thoughts`
			`if len(turn["value"]) < 20:`
			`continue`
			`if "nothing has happened" in turn["value"].lower():`
			`continue`
			`quality_cycles.append(cycle)`
			`break`

			`with open(output, "w") as f:`
			`for cycle in quality_cycles:`
			`f.write(json.dumps(cycle) + "\n")`

			`return output`

			`def list_trajectory_files(self) -> list[dict]:`
			`"""List all trajectory files with stats."""`
			`files = []`
			`for traj_file in sorted(self.log_dir.glob("trajectory_*.jsonl")):`
			`count = 0`
			`with open(traj_file) as f:`
			`for line in f:`
			`if line.strip():`
			`count += 1`
			`files.append({`
			`"file": str(traj_file),`
			`"date": traj_file.stem.replace("trajectory_", ""),`
			`"cycles": count,`
			`"size_kb": traj_file.stat().st_size / 1024,`
			`})`
			`return files`