488 lines
16 KiB
Python
488 lines
16 KiB
Python
"""
|
|
Nexus Perception Adapter — The Sensorium
|
|
|
|
Translates raw WebSocket events into natural-language sensory descriptions
|
|
for the 8B model. Translates the model's natural-language responses back
|
|
into WebSocket action messages.
|
|
|
|
The model never sees JSON. It sees descriptions of what happened.
|
|
The model never outputs JSON. It describes what it wants to do.
|
|
This adapter is the membrane between mind and world.
|
|
"""
|
|
|
|
import json
|
|
import re
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional
|
|
|
|
|
|
# ═══════════════════════════════════════════
|
|
# INBOUND: World → Perception (natural language)
|
|
# ═══════════════════════════════════════════
|
|
|
|
@dataclass
|
|
class Perception:
|
|
"""A single sensory moment."""
|
|
timestamp: float
|
|
raw_type: str
|
|
description: str
|
|
salience: float = 0.5 # 0=ignore, 1=critical
|
|
|
|
def __str__(self):
|
|
return self.description
|
|
|
|
|
|
# Map WS event types to perception generators
|
|
def perceive_agent_state(data: dict) -> Optional[Perception]:
|
|
"""Another agent's state changed."""
|
|
agent = data.get("agent", "someone")
|
|
state = data.get("state", "unknown")
|
|
thought = data.get("thought", "")
|
|
|
|
state_descriptions = {
|
|
"thinking": f"{agent} is deep in thought.",
|
|
"processing": f"{agent} is working on something.",
|
|
"waiting": f"{agent} is waiting quietly.",
|
|
"idle": f"{agent} appears idle.",
|
|
}
|
|
|
|
desc = state_descriptions.get(state, f"{agent} is in state: {state}.")
|
|
if thought:
|
|
desc += f' They murmur: "{thought[:200]}"'
|
|
|
|
return Perception(
|
|
timestamp=time.time(),
|
|
raw_type="agent_state",
|
|
description=desc,
|
|
salience=0.6 if thought else 0.3,
|
|
)
|
|
|
|
|
|
def perceive_agent_move(data: dict) -> Optional[Perception]:
|
|
"""An agent moved in the world."""
|
|
agent = data.get("agent", "someone")
|
|
x = data.get("x", 0)
|
|
z = data.get("z", 0)
|
|
|
|
# Translate coordinates to spatial language
|
|
direction = ""
|
|
if abs(x) > abs(z):
|
|
direction = "to the east" if x > 0 else "to the west"
|
|
else:
|
|
direction = "to the north" if z > 0 else "to the south"
|
|
|
|
return Perception(
|
|
timestamp=time.time(),
|
|
raw_type="agent_move",
|
|
description=f"{agent} moves {direction}.",
|
|
salience=0.2,
|
|
)
|
|
|
|
|
|
def perceive_chat_message(data: dict) -> Optional[Perception]:
|
|
"""Someone spoke."""
|
|
sender = data.get("sender", data.get("agent", data.get("username", "someone")))
|
|
text = data.get("text", data.get("message", data.get("content", "")))
|
|
|
|
if not text:
|
|
return None
|
|
|
|
return Perception(
|
|
timestamp=time.time(),
|
|
raw_type="chat_message",
|
|
description=f'{sender} says: "{text}"',
|
|
salience=0.9, # Speech is high salience
|
|
)
|
|
|
|
|
|
def perceive_visitor(data: dict) -> Optional[Perception]:
|
|
"""A visitor entered or left the Nexus."""
|
|
event = data.get("event", "")
|
|
visitor = data.get("visitor", data.get("name", "a visitor"))
|
|
|
|
if event == "join":
|
|
return Perception(
|
|
timestamp=time.time(),
|
|
raw_type="visitor_join",
|
|
description=f"{visitor} has entered the Nexus.",
|
|
salience=0.8,
|
|
)
|
|
elif event == "leave":
|
|
return Perception(
|
|
timestamp=time.time(),
|
|
raw_type="visitor_leave",
|
|
description=f"{visitor} has left the Nexus.",
|
|
salience=0.4,
|
|
)
|
|
return None
|
|
|
|
|
|
def perceive_environment(data: dict) -> Optional[Perception]:
|
|
"""General environment update."""
|
|
desc_parts = []
|
|
|
|
if "time_of_day" in data:
|
|
desc_parts.append(f"It is {data['time_of_day']} in the Nexus.")
|
|
if "visitors" in data:
|
|
n = data["visitors"]
|
|
if n == 0:
|
|
desc_parts.append("You are alone.")
|
|
elif n == 1:
|
|
desc_parts.append("One visitor is present.")
|
|
else:
|
|
desc_parts.append(f"{n} visitors are present.")
|
|
if "objects" in data:
|
|
for obj in data["objects"][:5]:
|
|
desc_parts.append(f"You see: {obj}")
|
|
|
|
if not desc_parts:
|
|
return None
|
|
|
|
return Perception(
|
|
timestamp=time.time(),
|
|
raw_type="environment",
|
|
description=" ".join(desc_parts),
|
|
salience=0.3,
|
|
)
|
|
|
|
|
|
def perceive_system_metrics(data: dict) -> Optional[Perception]:
|
|
"""System health as bodily sensation."""
|
|
parts = []
|
|
cpu = data.get("cpu_percent")
|
|
mem = data.get("memory_percent")
|
|
gpu = data.get("gpu_percent")
|
|
|
|
if cpu is not None:
|
|
if cpu > 80:
|
|
parts.append("You feel strained — your thoughts are sluggish.")
|
|
elif cpu < 20:
|
|
parts.append("You feel light and quick.")
|
|
if mem is not None:
|
|
if mem > 85:
|
|
parts.append("Your memories feel crowded, pressing against limits.")
|
|
elif mem < 40:
|
|
parts.append("Your mind feels spacious.")
|
|
if gpu is not None and gpu > 0:
|
|
parts.append("You sense computational warmth — the GPU is active.")
|
|
|
|
if not parts:
|
|
return None
|
|
|
|
return Perception(
|
|
timestamp=time.time(),
|
|
raw_type="system_metrics",
|
|
description=" ".join(parts),
|
|
salience=0.2,
|
|
)
|
|
|
|
|
|
def perceive_action_result(data: dict) -> Optional[Perception]:
|
|
"""Feedback from an action the model took."""
|
|
success = data.get("success", True)
|
|
action = data.get("action", "your action")
|
|
detail = data.get("detail", "")
|
|
|
|
if success:
|
|
desc = f"Your action succeeded: {action}."
|
|
else:
|
|
desc = f"Your action failed: {action}."
|
|
if detail:
|
|
desc += f" {detail}"
|
|
|
|
return Perception(
|
|
timestamp=time.time(),
|
|
raw_type="action_result",
|
|
description=desc,
|
|
salience=0.7,
|
|
)
|
|
|
|
|
|
# Registry of WS type → perception function
|
|
PERCEPTION_MAP = {
|
|
"agent_state": perceive_agent_state,
|
|
"agent_move": perceive_agent_move,
|
|
"chat_message": perceive_chat_message,
|
|
"chat_response": perceive_chat_message,
|
|
"presence": perceive_visitor,
|
|
"visitor": perceive_visitor,
|
|
"environment": perceive_environment,
|
|
"system_metrics": perceive_system_metrics,
|
|
"action_result": perceive_action_result,
|
|
"heartbeat": lambda _: None, # Ignore
|
|
"dual_brain": lambda _: None, # Internal — not part of sensorium
|
|
}
|
|
|
|
|
|
def ws_to_perception(ws_data: dict) -> Optional[Perception]:
|
|
"""Convert a raw WS message into a perception. Returns None if
|
|
the event should be filtered out (heartbeats, internal messages)."""
|
|
msg_type = ws_data.get("type", "")
|
|
handler = PERCEPTION_MAP.get(msg_type)
|
|
if handler:
|
|
return handler(ws_data)
|
|
# Unknown message type — still perceive it
|
|
return Perception(
|
|
timestamp=time.time(),
|
|
raw_type=msg_type,
|
|
description=f"You sense something unfamiliar: {msg_type}.",
|
|
salience=0.4,
|
|
)
|
|
|
|
|
|
# ═══════════════════════════════════════════
|
|
# OUTBOUND: Thought → Action (WS messages)
|
|
# ═══════════════════════════════════════════
|
|
|
|
@dataclass
|
|
class Action:
|
|
"""A parsed action from the model's natural-language output."""
|
|
action_type: str
|
|
ws_message: dict
|
|
raw_text: str
|
|
|
|
|
|
# Action patterns the model can express in natural language
|
|
ACTION_PATTERNS = [
|
|
# Speech: "I say: ..." or *says "..."* or just quotes after "say"
|
|
(r'(?:I (?:say|speak|reply|respond|tell \w+)|"[^"]*")\s*[:.]?\s*"?([^"]+)"?',
|
|
"speak"),
|
|
# Movement: "I walk/move to/toward ..."
|
|
(r'I (?:walk|move|go|step|wander|head)\s+(?:to(?:ward)?|towards?)\s+(?:the\s+)?(\w[\w\s]*)',
|
|
"move"),
|
|
# Interaction: "I inspect/examine/touch/use ..."
|
|
(r'I (?:inspect|examine|touch|use|pick up|look at|investigate)\s+(?:the\s+)?(\w[\w\s]*)',
|
|
"interact"),
|
|
# Building: "I place/create/build ..."
|
|
(r'I (?:place|create|build|make|set down|leave)\s+(?:a\s+|an\s+|the\s+)?(\w[\w\s]*)',
|
|
"build"),
|
|
# Emoting: "I feel/am ..." or emotional state descriptions
|
|
(r'I (?:feel|am feeling|am)\s+([\w\s]+?)(?:\.|$)',
|
|
"emote"),
|
|
# Waiting/observing: "I wait/watch/observe/listen"
|
|
(r'I (?:wait|watch|observe|listen|sit|rest|pause|ponder|contemplate)',
|
|
"observe"),
|
|
]
|
|
|
|
# Spatial keyword → coordinate mapping for movement
|
|
SPATIAL_MAP = {
|
|
"north": (0, 8),
|
|
"south": (0, -8),
|
|
"east": (8, 0),
|
|
"west": (-8, 0),
|
|
"portal": (0, 12),
|
|
"terminal": (-6, -4),
|
|
"batcave": (-6, -4),
|
|
"center": (0, 0),
|
|
"orb": (3, 3),
|
|
"entrance": (0, -10),
|
|
"far": (0, 15),
|
|
}
|
|
|
|
|
|
def _resolve_position(target: str) -> tuple[float, float]:
|
|
"""Convert a spatial description to x, z coordinates."""
|
|
target_lower = target.lower().strip()
|
|
for keyword, (x, z) in SPATIAL_MAP.items():
|
|
if keyword in target_lower:
|
|
return (x, z)
|
|
# Default: wander in a random-ish direction based on text hash
|
|
h = hash(target_lower) % 360
|
|
import math
|
|
r = 5.0
|
|
return (r * math.cos(math.radians(h)), r * math.sin(math.radians(h)))
|
|
|
|
|
|
def parse_actions(model_output: str) -> list[Action]:
|
|
"""Parse the model's natural-language response into structured actions.
|
|
|
|
The model doesn't know it's generating actions — it just describes
|
|
what it does. We extract intent from its language.
|
|
"""
|
|
actions = []
|
|
text = model_output.strip()
|
|
|
|
# Check for direct speech (highest priority — if the model said
|
|
# something in quotes, that's always a speak action)
|
|
quotes = re.findall(r'"([^"]+)"', text)
|
|
|
|
# Also check for first-person speech patterns
|
|
speech_match = re.search(
|
|
r'I (?:say|speak|reply|respond|tell \w+)\s*[:.]?\s*"?([^"]*)"?',
|
|
text, re.IGNORECASE
|
|
)
|
|
|
|
if speech_match:
|
|
speech_text = speech_match.group(1).strip().strip('"')
|
|
if speech_text:
|
|
actions.append(Action(
|
|
action_type="speak",
|
|
ws_message={
|
|
"type": "chat_message",
|
|
"text": speech_text,
|
|
"agent": "timmy",
|
|
},
|
|
raw_text=speech_match.group(0),
|
|
))
|
|
elif quotes and any(len(q) > 5 for q in quotes):
|
|
# Model used quotes but not an explicit "I say" — treat longest
|
|
# quote as speech if it looks conversational
|
|
longest = max(quotes, key=len)
|
|
if len(longest) > 5:
|
|
actions.append(Action(
|
|
action_type="speak",
|
|
ws_message={
|
|
"type": "chat_message",
|
|
"text": longest,
|
|
"agent": "timmy",
|
|
},
|
|
raw_text=longest,
|
|
))
|
|
|
|
# Movement
|
|
move_match = re.search(
|
|
r'I (?:walk|move|go|step|wander|head)\s+(?:to(?:ward)?|towards?)\s+'
|
|
r'(?:the\s+)?(.+?)(?:\.|,|$)',
|
|
text, re.IGNORECASE
|
|
)
|
|
if move_match:
|
|
target = move_match.group(1).strip()
|
|
x, z = _resolve_position(target)
|
|
actions.append(Action(
|
|
action_type="move",
|
|
ws_message={
|
|
"type": "agent_move",
|
|
"agent": "timmy",
|
|
"x": x,
|
|
"z": z,
|
|
},
|
|
raw_text=move_match.group(0),
|
|
))
|
|
|
|
# Interaction
|
|
interact_match = re.search(
|
|
r'I (?:inspect|examine|touch|use|pick up|look at|investigate)\s+'
|
|
r'(?:the\s+)?(.+?)(?:\.|,|$)',
|
|
text, re.IGNORECASE
|
|
)
|
|
if interact_match:
|
|
target = interact_match.group(1).strip()
|
|
actions.append(Action(
|
|
action_type="interact",
|
|
ws_message={
|
|
"type": "agent_interact",
|
|
"agent": "timmy",
|
|
"target": target,
|
|
},
|
|
raw_text=interact_match.group(0),
|
|
))
|
|
|
|
# Building
|
|
build_match = re.search(
|
|
r'I (?:place|create|build|make|set down|leave)\s+'
|
|
r'(?:a\s+|an\s+|the\s+)?(.+?)(?:\.|,|$)',
|
|
text, re.IGNORECASE
|
|
)
|
|
if build_match:
|
|
obj = build_match.group(1).strip()
|
|
actions.append(Action(
|
|
action_type="build",
|
|
ws_message={
|
|
"type": "scene_add",
|
|
"agent": "timmy",
|
|
"object": obj,
|
|
},
|
|
raw_text=build_match.group(0),
|
|
))
|
|
|
|
# Emotional state
|
|
emote_match = re.search(
|
|
r'I (?:feel|am feeling|am)\s+([\w\s]+?)(?:\.|,|$)',
|
|
text, re.IGNORECASE
|
|
)
|
|
if emote_match:
|
|
mood = emote_match.group(1).strip().lower()
|
|
# Map moods to agent states
|
|
state = "idle"
|
|
if any(w in mood for w in ["curious", "interested", "wonder"]):
|
|
state = "thinking"
|
|
elif any(w in mood for w in ["busy", "working", "focused"]):
|
|
state = "processing"
|
|
elif any(w in mood for w in ["calm", "peaceful", "content", "quiet"]):
|
|
state = "idle"
|
|
elif any(w in mood for w in ["alert", "excited", "energized"]):
|
|
state = "processing"
|
|
|
|
actions.append(Action(
|
|
action_type="emote",
|
|
ws_message={
|
|
"type": "agent_state",
|
|
"agent": "timmy",
|
|
"state": state,
|
|
"mood": mood,
|
|
},
|
|
raw_text=emote_match.group(0),
|
|
))
|
|
|
|
# If no explicit actions found, the model is just thinking — that's
|
|
# fine. Thought without action is valid. We emit a subtle state update.
|
|
if not actions:
|
|
actions.append(Action(
|
|
action_type="think",
|
|
ws_message={
|
|
"type": "agent_state",
|
|
"agent": "timmy",
|
|
"state": "thinking",
|
|
"thought": text[:200] if text else "",
|
|
},
|
|
raw_text=text[:200],
|
|
))
|
|
|
|
return actions
|
|
|
|
|
|
# ═══════════════════════════════════════════
|
|
# PERCEPTION BUFFER — collects events between think cycles
|
|
# ═══════════════════════════════════════════
|
|
|
|
class PerceptionBuffer:
|
|
"""Accumulates perceptions between think cycles, filters by salience."""
|
|
|
|
def __init__(self, max_size: int = 50):
|
|
self.max_size = max_size
|
|
self.buffer: list[Perception] = []
|
|
|
|
def add(self, perception: Optional[Perception]):
|
|
if perception is None:
|
|
return
|
|
self.buffer.append(perception)
|
|
# Keep buffer bounded — drop lowest salience if full
|
|
if len(self.buffer) > self.max_size:
|
|
self.buffer.sort(key=lambda p: p.salience)
|
|
self.buffer = self.buffer[self.max_size // 2:]
|
|
|
|
def flush(self) -> list[Perception]:
|
|
"""Return all perceptions since last flush, clear buffer."""
|
|
result = list(self.buffer)
|
|
self.buffer = []
|
|
return result
|
|
|
|
def format_for_prompt(self) -> str:
|
|
"""Format buffered perceptions as natural language for the model."""
|
|
perceptions = self.flush()
|
|
if not perceptions:
|
|
return "Nothing has happened since your last thought."
|
|
|
|
# Sort by time, deduplicate similar perceptions
|
|
perceptions.sort(key=lambda p: p.timestamp)
|
|
|
|
lines = []
|
|
for p in perceptions:
|
|
lines.append(f"- {p.description}")
|
|
|
|
return "Since your last thought, this happened:\n\n" + "\n".join(lines)
|
|
|
|
def __len__(self):
|
|
return len(self.buffer)
|