the-nexus/nexus/perception_adapter.py

"""
Nexus Perception Adapter — The Sensorium

Translates raw WebSocket events into natural-language sensory descriptions
for the 8B model. Translates the model's natural-language responses back
into WebSocket action messages.

The model never sees JSON. It sees descriptions of what happened.
The model never outputs JSON. It describes what it wants to do.
This adapter is the membrane between mind and world.
"""

import json
import re
import time
from dataclasses import dataclass, field
from typing import Optional


# ═══════════════════════════════════════════
# INBOUND: World → Perception (natural language)
# ═══════════════════════════════════════════

@dataclass
class Perception:
    """A single sensory moment."""
    timestamp: float
    raw_type: str
    description: str
    salience: float = 0.5  # 0=ignore, 1=critical

    def __str__(self):
        return self.description


# Map WS event types to perception generators
def perceive_agent_state(data: dict) -> Optional[Perception]:
    """Another agent's state changed."""
    agent = data.get("agent", "someone")
    state = data.get("state", "unknown")
    thought = data.get("thought", "")

    state_descriptions = {
        "thinking": f"{agent} is deep in thought.",
        "processing": f"{agent} is working on something.",
        "waiting": f"{agent} is waiting quietly.",
        "idle": f"{agent} appears idle.",
    }

    desc = state_descriptions.get(state, f"{agent} is in state: {state}.")
    if thought:
        desc += f' They murmur: "{thought[:200]}"'

    return Perception(
        timestamp=time.time(),
        raw_type="agent_state",
        description=desc,
        salience=0.6 if thought else 0.3,
    )


def perceive_agent_move(data: dict) -> Optional[Perception]:
    """An agent moved in the world."""
    agent = data.get("agent", "someone")
    x = data.get("x", 0)
    z = data.get("z", 0)

    # Translate coordinates to spatial language
    direction = ""
    if abs(x) > abs(z):
        direction = "to the east" if x > 0 else "to the west"
    else:
        direction = "to the north" if z > 0 else "to the south"

    return Perception(
        timestamp=time.time(),
        raw_type="agent_move",
        description=f"{agent} moves {direction}.",
        salience=0.2,
    )


def perceive_chat_message(data: dict) -> Optional[Perception]:
    """Someone spoke."""
    sender = data.get("sender", data.get("agent", "someone"))
    text = data.get("text", data.get("message", ""))

    if not text:
        return None

    return Perception(
        timestamp=time.time(),
        raw_type="chat_message",
        description=f'{sender} says: "{text}"',
        salience=0.9,  # Speech is high salience
    )


def perceive_visitor(data: dict) -> Optional[Perception]:
    """A visitor entered or left the Nexus."""
    event = data.get("event", "")
    visitor = data.get("visitor", data.get("name", "a visitor"))

    if event == "join":
        return Perception(
            timestamp=time.time(),
            raw_type="visitor_join",
            description=f"{visitor} has entered the Nexus.",
            salience=0.8,
        )
    elif event == "leave":
        return Perception(
            timestamp=time.time(),
            raw_type="visitor_leave",
            description=f"{visitor} has left the Nexus.",
            salience=0.4,
        )
    return None


def perceive_environment(data: dict) -> Optional[Perception]:
    """General environment update."""
    desc_parts = []

    if "time_of_day" in data:
        desc_parts.append(f"It is {data['time_of_day']} in the Nexus.")
    if "visitors" in data:
        n = data["visitors"]
        if n == 0:
            desc_parts.append("You are alone.")
        elif n == 1:
            desc_parts.append("One visitor is present.")
        else:
            desc_parts.append(f"{n} visitors are present.")
    if "objects" in data:
        for obj in data["objects"][:5]:
            desc_parts.append(f"You see: {obj}")

    if not desc_parts:
        return None

    return Perception(
        timestamp=time.time(),
        raw_type="environment",
        description=" ".join(desc_parts),
        salience=0.3,
    )


def perceive_system_metrics(data: dict) -> Optional[Perception]:
    """System health as bodily sensation."""
    parts = []
    cpu = data.get("cpu_percent")
    mem = data.get("memory_percent")
    gpu = data.get("gpu_percent")

    if cpu is not None:
        if cpu > 80:
            parts.append("You feel strained — your thoughts are sluggish.")
        elif cpu < 20:
            parts.append("You feel light and quick.")
    if mem is not None:
        if mem > 85:
            parts.append("Your memories feel crowded, pressing against limits.")
        elif mem < 40:
            parts.append("Your mind feels spacious.")
    if gpu is not None and gpu > 0:
        parts.append("You sense computational warmth — the GPU is active.")

    if not parts:
        return None

    return Perception(
        timestamp=time.time(),
        raw_type="system_metrics",
        description=" ".join(parts),
        salience=0.2,
    )


def perceive_action_result(data: dict) -> Optional[Perception]:
    """Feedback from an action the model took."""
    success = data.get("success", True)
    action = data.get("action", "your action")
    detail = data.get("detail", "")

    if success:
        desc = f"Your action succeeded: {action}."
    else:
        desc = f"Your action failed: {action}."
    if detail:
        desc += f" {detail}"

    return Perception(
        timestamp=time.time(),
        raw_type="action_result",
        description=desc,
        salience=0.7,
    )


# Registry of WS type → perception function
PERCEPTION_MAP = {
    "agent_state":      perceive_agent_state,
    "agent_move":       perceive_agent_move,
    "chat_message":     perceive_chat_message,
    "chat_response":    perceive_chat_message,
    "presence":         perceive_visitor,
    "visitor":          perceive_visitor,
    "environment":      perceive_environment,
    "system_metrics":   perceive_system_metrics,
    "action_result":    perceive_action_result,
    "heartbeat":        lambda _: None,  # Ignore
    "dual_brain":       lambda _: None,  # Internal — not part of sensorium
}


def ws_to_perception(ws_data: dict) -> Optional[Perception]:
    """Convert a raw WS message into a perception. Returns None if
    the event should be filtered out (heartbeats, internal messages)."""
    msg_type = ws_data.get("type", "")
    handler = PERCEPTION_MAP.get(msg_type)
    if handler:
        return handler(ws_data)
    # Unknown message type — still perceive it
    return Perception(
        timestamp=time.time(),
        raw_type=msg_type,
        description=f"You sense something unfamiliar: {msg_type}.",
        salience=0.4,
    )


# ═══════════════════════════════════════════
# OUTBOUND: Thought → Action (WS messages)
# ═══════════════════════════════════════════

@dataclass
class Action:
    """A parsed action from the model's natural-language output."""
    action_type: str
    ws_message: dict
    raw_text: str


# Action patterns the model can express in natural language
ACTION_PATTERNS = [
    # Speech: "I say: ..." or *says "..."* or just quotes after "say"
    (r'(?:I (?:say|speak|reply|respond|tell \w+)|"[^"]*")\s*[:.]?\s*"?([^"]+)"?',
     "speak"),
    # Movement: "I walk/move to/toward ..."
    (r'I (?:walk|move|go|step|wander|head)\s+(?:to(?:ward)?|towards?)\s+(?:the\s+)?(\w[\w\s]*)',
     "move"),
    # Interaction: "I inspect/examine/touch/use ..."
    (r'I (?:inspect|examine|touch|use|pick up|look at|investigate)\s+(?:the\s+)?(\w[\w\s]*)',
     "interact"),
    # Building: "I place/create/build ..."
    (r'I (?:place|create|build|make|set down|leave)\s+(?:a\s+|an\s+|the\s+)?(\w[\w\s]*)',
     "build"),
    # Emoting: "I feel/am ..." or emotional state descriptions
    (r'I (?:feel|am feeling|am)\s+([\w\s]+?)(?:\.|$)',
     "emote"),
    # Waiting/observing: "I wait/watch/observe/listen"
    (r'I (?:wait|watch|observe|listen|sit|rest|pause|ponder|contemplate)',
     "observe"),
]

# Spatial keyword → coordinate mapping for movement
SPATIAL_MAP = {
    "north":     (0, 8),
    "south":     (0, -8),
    "east":      (8, 0),
    "west":      (-8, 0),
    "portal":    (0, 12),
    "terminal":  (-6, -4),
    "batcave":   (-6, -4),
    "center":    (0, 0),
    "orb":       (3, 3),
    "entrance":  (0, -10),
    "far":       (0, 15),
}


def _resolve_position(target: str) -> tuple[float, float]:
    """Convert a spatial description to x, z coordinates."""
    target_lower = target.lower().strip()
    for keyword, (x, z) in SPATIAL_MAP.items():
        if keyword in target_lower:
            return (x, z)
    # Default: wander in a random-ish direction based on text hash
    h = hash(target_lower) % 360
    import math
    r = 5.0
    return (r * math.cos(math.radians(h)), r * math.sin(math.radians(h)))


def parse_actions(model_output: str) -> list[Action]:
    """Parse the model's natural-language response into structured actions.

    The model doesn't know it's generating actions — it just describes
    what it does. We extract intent from its language.
    """
    actions = []
    text = model_output.strip()

    # Check for direct speech (highest priority — if the model said
    # something in quotes, that's always a speak action)
    quotes = re.findall(r'"([^"]+)"', text)

    # Also check for first-person speech patterns
    speech_match = re.search(
        r'I (?:say|speak|reply|respond|tell \w+)\s*[:.]?\s*"?([^"]*)"?',
        text, re.IGNORECASE
    )

    if speech_match:
        speech_text = speech_match.group(1).strip().strip('"')
        if speech_text:
            actions.append(Action(
                action_type="speak",
                ws_message={
                    "type": "chat_message",
                    "text": speech_text,
                    "agent": "timmy",
                },
                raw_text=speech_match.group(0),
            ))
    elif quotes and any(len(q) > 5 for q in quotes):
        # Model used quotes but not an explicit "I say" — treat longest
        # quote as speech if it looks conversational
        longest = max(quotes, key=len)
        if len(longest) > 5:
            actions.append(Action(
                action_type="speak",
                ws_message={
                    "type": "chat_message",
                    "text": longest,
                    "agent": "timmy",
                },
                raw_text=longest,
            ))

    # Movement
    move_match = re.search(
        r'I (?:walk|move|go|step|wander|head)\s+(?:to(?:ward)?|towards?)\s+'
        r'(?:the\s+)?(.+?)(?:\.|,|$)',
        text, re.IGNORECASE
    )
    if move_match:
        target = move_match.group(1).strip()
        x, z = _resolve_position(target)
        actions.append(Action(
            action_type="move",
            ws_message={
                "type": "agent_move",
                "agent": "timmy",
                "x": x,
                "z": z,
            },
            raw_text=move_match.group(0),
        ))

    # Interaction
    interact_match = re.search(
        r'I (?:inspect|examine|touch|use|pick up|look at|investigate)\s+'
        r'(?:the\s+)?(.+?)(?:\.|,|$)',
        text, re.IGNORECASE
    )
    if interact_match:
        target = interact_match.group(1).strip()
        actions.append(Action(
            action_type="interact",
            ws_message={
                "type": "agent_interact",
                "agent": "timmy",
                "target": target,
            },
            raw_text=interact_match.group(0),
        ))

    # Building
    build_match = re.search(
        r'I (?:place|create|build|make|set down|leave)\s+'
        r'(?:a\s+|an\s+|the\s+)?(.+?)(?:\.|,|$)',
        text, re.IGNORECASE
    )
    if build_match:
        obj = build_match.group(1).strip()
        actions.append(Action(
            action_type="build",
            ws_message={
                "type": "scene_add",
                "agent": "timmy",
                "object": obj,
            },
            raw_text=build_match.group(0),
        ))

    # Emotional state
    emote_match = re.search(
        r'I (?:feel|am feeling|am)\s+([\w\s]+?)(?:\.|,|$)',
        text, re.IGNORECASE
    )
    if emote_match:
        mood = emote_match.group(1).strip().lower()
        # Map moods to agent states
        state = "idle"
        if any(w in mood for w in ["curious", "interested", "wonder"]):
            state = "thinking"
        elif any(w in mood for w in ["busy", "working", "focused"]):
            state = "processing"
        elif any(w in mood for w in ["calm", "peaceful", "content", "quiet"]):
            state = "idle"
        elif any(w in mood for w in ["alert", "excited", "energized"]):
            state = "processing"

        actions.append(Action(
            action_type="emote",
            ws_message={
                "type": "agent_state",
                "agent": "timmy",
                "state": state,
                "mood": mood,
            },
            raw_text=emote_match.group(0),
        ))

    # If no explicit actions found, the model is just thinking — that's
    # fine. Thought without action is valid. We emit a subtle state update.
    if not actions:
        actions.append(Action(
            action_type="think",
            ws_message={
                "type": "agent_state",
                "agent": "timmy",
                "state": "thinking",
                "thought": text[:200] if text else "",
            },
            raw_text=text[:200],
        ))

    return actions


# ═══════════════════════════════════════════
# PERCEPTION BUFFER — collects events between think cycles
# ═══════════════════════════════════════════

class PerceptionBuffer:
    """Accumulates perceptions between think cycles, filters by salience."""

    def __init__(self, max_size: int = 50):
        self.max_size = max_size
        self.buffer: list[Perception] = []

    def add(self, perception: Optional[Perception]):
        if perception is None:
            return
        self.buffer.append(perception)
        # Keep buffer bounded — drop lowest salience if full
        if len(self.buffer) > self.max_size:
            self.buffer.sort(key=lambda p: p.salience)
            self.buffer = self.buffer[self.max_size // 2:]

    def flush(self) -> list[Perception]:
        """Return all perceptions since last flush, clear buffer."""
        result = list(self.buffer)
        self.buffer = []
        return result

    def format_for_prompt(self) -> str:
        """Format buffered perceptions as natural language for the model."""
        perceptions = self.flush()
        if not perceptions:
            return "Nothing has happened since your last thought."

        # Sort by time, deduplicate similar perceptions
        perceptions.sort(key=lambda p: p.timestamp)

        lines = []
        for p in perceptions:
            lines.append(f"- {p.description}")

        return "Since your last thought, this happened:\n\n" + "\n".join(lines)

    def __len__(self):
        return len(self.buffer)