Files
the-nexus/nexus/trajectory_logger.py
perplexity 1ecca527cb
Some checks failed
CI / validate (pull_request) Failing after 5s
feat: Nexus Mind — embodied consciousness loop for 8B sovereign brain
Adds the perception adapter, experience store, trajectory logger, and
consciousness loop that give Timmy a body in the Nexus.

Architecture:
  BIRTH.md           — Thin system prompt. SOUL.md conscience + embodied
                       awareness. No meta-knowledge about implementation.
  perception_adapter — Translates WS events to natural-language sensory
                       descriptions. Parses model output into WS actions.
  experience_store   — SQLite-backed lived-experience memory. The model
                       remembers only what it perceived through its channel.
  trajectory_logger  — Logs every perceive→think→act cycle as ShareGPT JSONL,
                       compatible with the AutoLoRA training pipeline.
  nexus_think        — The consciousness loop. Connects to WS gateway,
                       receives perceptions, thinks via Ollama, sends actions.

The 8B model wakes up knowing nothing but its values and what it
experiences. Training loops close on lived experience — emergence
through the channel, not through fine-tuning toward behaviors.

Run: python -m nexus.nexus_think --model timmy:v0.1-q4 --ws ws://localhost:8765
2026-03-25 17:20:03 +00:00

144 lines
5.1 KiB
Python

"""
Nexus Trajectory Logger — AutoLoRA Training Data from Lived Experience
Every perceive→think→act cycle is a potential training sample.
This logger writes them in ShareGPT JSONL format, compatible with
the existing AutoLoRA pipeline (build_curated_dataset.py, train_modal.py).
The key insight: the model trains on its own embodied experiences.
Over time, the LoRA adapter shapes the base model into something
that was born in the Nexus, not fine-tuned toward it.
"""
import json
import time
from pathlib import Path
from typing import Optional
DEFAULT_LOG_DIR = Path.home() / ".nexus" / "trajectories"
class TrajectoryLogger:
def __init__(self, log_dir: Optional[Path] = None, system_prompt: str = ""):
self.log_dir = log_dir or DEFAULT_LOG_DIR
self.log_dir.mkdir(parents=True, exist_ok=True)
self.system_prompt = system_prompt
# Current session
self.session_id = f"nexus_{int(time.time())}"
self.cycles: list[dict] = []
# Active log file — one per day
today = time.strftime("%Y-%m-%d")
self.log_file = self.log_dir / f"trajectory_{today}.jsonl"
def log_cycle(
self,
perception: str,
thought: str,
actions: list[str],
cycle_ms: int = 0,
):
"""Log one perceive→think→act cycle as a training sample.
Format: ShareGPT JSONL — the same format used by
build_curated_dataset.py and consumed by train_modal.py.
The 'user' turn is the perception (what the world showed the model).
The 'assistant' turn is the thought + action (what the model did).
"""
cycle = {
"id": f"{self.session_id}_cycle_{len(self.cycles)}",
"model": "nexus-embodied",
"started_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
"cycle_ms": cycle_ms,
"conversations": [
{"from": "system", "value": self.system_prompt},
{"from": "human", "value": perception},
{"from": "gpt", "value": thought},
],
}
# If actions produced responses (speech), add them as follow-up
for action_desc in actions:
if action_desc:
# Actions are appended as context — the model learning
# that certain thoughts lead to certain world-effects
cycle["conversations"].append(
{"from": "human", "value": f"[World responds]: {action_desc}"}
)
cycle["message_count"] = len(cycle["conversations"])
self.cycles.append(cycle)
# Append to daily log file
with open(self.log_file, "a") as f:
f.write(json.dumps(cycle) + "\n")
return cycle["id"]
def get_session_stats(self) -> dict:
"""Stats for the current session."""
return {
"session_id": self.session_id,
"cycles": len(self.cycles),
"log_file": str(self.log_file),
"total_turns": sum(
len(c["conversations"]) for c in self.cycles
),
}
def export_for_training(self, output_path: Optional[Path] = None) -> Path:
"""Export all trajectory files into a single training-ready JSONL.
Merges all daily trajectory files into one dataset that can be
fed directly to the AutoLoRA pipeline.
"""
output = output_path or (self.log_dir / "nexus_training_data.jsonl")
all_cycles = []
for traj_file in sorted(self.log_dir.glob("trajectory_*.jsonl")):
with open(traj_file) as f:
for line in f:
line = line.strip()
if line:
all_cycles.append(json.loads(line))
# Quality filter — only keep cycles where the model actually
# produced meaningful thought (not just "Nothing has happened")
quality_cycles = []
for cycle in all_cycles:
convos = cycle.get("conversations", [])
gpt_turns = [c for c in convos if c["from"] == "gpt"]
for turn in gpt_turns:
# Skip empty/trivial thoughts
if len(turn["value"]) < 20:
continue
if "nothing has happened" in turn["value"].lower():
continue
quality_cycles.append(cycle)
break
with open(output, "w") as f:
for cycle in quality_cycles:
f.write(json.dumps(cycle) + "\n")
return output
def list_trajectory_files(self) -> list[dict]:
"""List all trajectory files with stats."""
files = []
for traj_file in sorted(self.log_dir.glob("trajectory_*.jsonl")):
count = 0
with open(traj_file) as f:
for line in f:
if line.strip():
count += 1
files.append({
"file": str(traj_file),
"date": traj_file.stem.replace("trajectory_", ""),
"cycles": count,
"size_kb": traj_file.stat().st_size / 1024,
})
return files