Files
the-nexus/nexus/perception_adapter.py
Alexander Whitestone 8ead4cd13f
Some checks failed
CI / validate (pull_request) Failing after 4s
feat: add thin Evennia to Nexus event adapter
2026-03-28 16:02:27 -04:00

541 lines
17 KiB
Python

"""
Nexus Perception Adapter — The Sensorium
Translates raw WebSocket events into natural-language sensory descriptions
for the 8B model. Translates the model's natural-language responses back
into WebSocket action messages.
The model never sees JSON. It sees descriptions of what happened.
The model never outputs JSON. It describes what it wants to do.
This adapter is the membrane between mind and world.
"""
import json
import re
import time
from dataclasses import dataclass, field
from typing import Optional
# ═══════════════════════════════════════════
# INBOUND: World → Perception (natural language)
# ═══════════════════════════════════════════
@dataclass
class Perception:
"""A single sensory moment."""
timestamp: float
raw_type: str
description: str
salience: float = 0.5 # 0=ignore, 1=critical
def __str__(self):
return self.description
# Map WS event types to perception generators
def perceive_agent_state(data: dict) -> Optional[Perception]:
"""Another agent's state changed."""
agent = data.get("agent", "someone")
state = data.get("state", "unknown")
thought = data.get("thought", "")
state_descriptions = {
"thinking": f"{agent} is deep in thought.",
"processing": f"{agent} is working on something.",
"waiting": f"{agent} is waiting quietly.",
"idle": f"{agent} appears idle.",
}
desc = state_descriptions.get(state, f"{agent} is in state: {state}.")
if thought:
desc += f' They murmur: "{thought[:200]}"'
return Perception(
timestamp=time.time(),
raw_type="agent_state",
description=desc,
salience=0.6 if thought else 0.3,
)
def perceive_agent_move(data: dict) -> Optional[Perception]:
"""An agent moved in the world."""
agent = data.get("agent", "someone")
x = data.get("x", 0)
z = data.get("z", 0)
# Translate coordinates to spatial language
direction = ""
if abs(x) > abs(z):
direction = "to the east" if x > 0 else "to the west"
else:
direction = "to the north" if z > 0 else "to the south"
return Perception(
timestamp=time.time(),
raw_type="agent_move",
description=f"{agent} moves {direction}.",
salience=0.2,
)
def perceive_chat_message(data: dict) -> Optional[Perception]:
"""Someone spoke."""
sender = data.get("sender", data.get("agent", data.get("username", "someone")))
text = data.get("text", data.get("message", data.get("content", "")))
if not text:
return None
return Perception(
timestamp=time.time(),
raw_type="chat_message",
description=f'{sender} says: "{text}"',
salience=0.9, # Speech is high salience
)
def perceive_visitor(data: dict) -> Optional[Perception]:
"""A visitor entered or left the Nexus."""
event = data.get("event", "")
visitor = data.get("visitor", data.get("name", "a visitor"))
if event == "join":
return Perception(
timestamp=time.time(),
raw_type="visitor_join",
description=f"{visitor} has entered the Nexus.",
salience=0.8,
)
elif event == "leave":
return Perception(
timestamp=time.time(),
raw_type="visitor_leave",
description=f"{visitor} has left the Nexus.",
salience=0.4,
)
return None
def perceive_environment(data: dict) -> Optional[Perception]:
"""General environment update."""
desc_parts = []
if "time_of_day" in data:
desc_parts.append(f"It is {data['time_of_day']} in the Nexus.")
if "visitors" in data:
n = data["visitors"]
if n == 0:
desc_parts.append("You are alone.")
elif n == 1:
desc_parts.append("One visitor is present.")
else:
desc_parts.append(f"{n} visitors are present.")
if "objects" in data:
for obj in data["objects"][:5]:
desc_parts.append(f"You see: {obj}")
if not desc_parts:
return None
return Perception(
timestamp=time.time(),
raw_type="environment",
description=" ".join(desc_parts),
salience=0.3,
)
def perceive_system_metrics(data: dict) -> Optional[Perception]:
"""System health as bodily sensation."""
parts = []
cpu = data.get("cpu_percent")
mem = data.get("memory_percent")
gpu = data.get("gpu_percent")
if cpu is not None:
if cpu > 80:
parts.append("You feel strained — your thoughts are sluggish.")
elif cpu < 20:
parts.append("You feel light and quick.")
if mem is not None:
if mem > 85:
parts.append("Your memories feel crowded, pressing against limits.")
elif mem < 40:
parts.append("Your mind feels spacious.")
if gpu is not None and gpu > 0:
parts.append("You sense computational warmth — the GPU is active.")
if not parts:
return None
return Perception(
timestamp=time.time(),
raw_type="system_metrics",
description=" ".join(parts),
salience=0.2,
)
def perceive_action_result(data: dict) -> Optional[Perception]:
"""Feedback from an action the model took."""
success = data.get("success", True)
action = data.get("action", "your action")
detail = data.get("detail", "")
if success:
desc = f"Your action succeeded: {action}."
else:
desc = f"Your action failed: {action}."
if detail:
desc += f" {detail}"
return Perception(
timestamp=time.time(),
raw_type="action_result",
description=desc,
salience=0.7,
)
def perceive_evennia_actor_located(data: dict) -> Optional[Perception]:
actor = data.get("actor_id", "Timmy")
room = data.get("room_name") or data.get("room_key") or data.get("room_id")
if not room:
return None
return Perception(
timestamp=time.time(),
raw_type="evennia.actor_located",
description=f"{actor} is now in {room}.",
salience=0.7,
)
def perceive_evennia_room_snapshot(data: dict) -> Optional[Perception]:
title = data.get("title") or data.get("room_key") or data.get("room_id")
desc = data.get("desc", "")
exits = ", ".join(exit.get("key", "") for exit in data.get("exits", []) if exit.get("key"))
objects = ", ".join(obj.get("key", "") for obj in data.get("objects", []) if obj.get("key"))
if not title:
return None
parts = [f"You are in {title}."]
if desc:
parts.append(desc)
if exits:
parts.append(f"Exits: {exits}.")
if objects:
parts.append(f"You see: {objects}.")
return Perception(
timestamp=time.time(),
raw_type="evennia.room_snapshot",
description=" ".join(parts),
salience=0.85,
)
def perceive_evennia_command_result(data: dict) -> Optional[Perception]:
success = data.get("success", True)
command = data.get("command_text", "your command")
output = data.get("output_text", "")
desc = f"Your world command {'succeeded' if success else 'failed'}: {command}."
if output:
desc += f" {output[:240]}"
return Perception(
timestamp=time.time(),
raw_type="evennia.command_result",
description=desc,
salience=0.8,
)
# Registry of WS type → perception function
PERCEPTION_MAP = {
"agent_state": perceive_agent_state,
"agent_move": perceive_agent_move,
"chat_message": perceive_chat_message,
"chat_response": perceive_chat_message,
"presence": perceive_visitor,
"visitor": perceive_visitor,
"environment": perceive_environment,
"system_metrics": perceive_system_metrics,
"action_result": perceive_action_result,
"heartbeat": lambda _: None, # Ignore
"dual_brain": lambda _: None, # Internal — not part of sensorium
"evennia.actor_located": perceive_evennia_actor_located,
"evennia.room_snapshot": perceive_evennia_room_snapshot,
"evennia.command_result": perceive_evennia_command_result,
}
def ws_to_perception(ws_data: dict) -> Optional[Perception]:
"""Convert a raw WS message into a perception. Returns None if
the event should be filtered out (heartbeats, internal messages)."""
msg_type = ws_data.get("type", "")
handler = PERCEPTION_MAP.get(msg_type)
if handler:
return handler(ws_data)
# Unknown message type — still perceive it
return Perception(
timestamp=time.time(),
raw_type=msg_type,
description=f"You sense something unfamiliar: {msg_type}.",
salience=0.4,
)
# ═══════════════════════════════════════════
# OUTBOUND: Thought → Action (WS messages)
# ═══════════════════════════════════════════
@dataclass
class Action:
"""A parsed action from the model's natural-language output."""
action_type: str
ws_message: dict
raw_text: str
# Action patterns the model can express in natural language
ACTION_PATTERNS = [
# Speech: "I say: ..." or *says "..."* or just quotes after "say"
(r'(?:I (?:say|speak|reply|respond|tell \w+)|"[^"]*")\s*[:.]?\s*"?([^"]+)"?',
"speak"),
# Movement: "I walk/move to/toward ..."
(r'I (?:walk|move|go|step|wander|head)\s+(?:to(?:ward)?|towards?)\s+(?:the\s+)?(\w[\w\s]*)',
"move"),
# Interaction: "I inspect/examine/touch/use ..."
(r'I (?:inspect|examine|touch|use|pick up|look at|investigate)\s+(?:the\s+)?(\w[\w\s]*)',
"interact"),
# Building: "I place/create/build ..."
(r'I (?:place|create|build|make|set down|leave)\s+(?:a\s+|an\s+|the\s+)?(\w[\w\s]*)',
"build"),
# Emoting: "I feel/am ..." or emotional state descriptions
(r'I (?:feel|am feeling|am)\s+([\w\s]+?)(?:\.|$)',
"emote"),
# Waiting/observing: "I wait/watch/observe/listen"
(r'I (?:wait|watch|observe|listen|sit|rest|pause|ponder|contemplate)',
"observe"),
]
# Spatial keyword → coordinate mapping for movement
SPATIAL_MAP = {
"north": (0, 8),
"south": (0, -8),
"east": (8, 0),
"west": (-8, 0),
"portal": (0, 12),
"terminal": (-6, -4),
"batcave": (-6, -4),
"center": (0, 0),
"orb": (3, 3),
"entrance": (0, -10),
"far": (0, 15),
}
def _resolve_position(target: str) -> tuple[float, float]:
"""Convert a spatial description to x, z coordinates."""
target_lower = target.lower().strip()
for keyword, (x, z) in SPATIAL_MAP.items():
if keyword in target_lower:
return (x, z)
# Default: wander in a random-ish direction based on text hash
h = hash(target_lower) % 360
import math
r = 5.0
return (r * math.cos(math.radians(h)), r * math.sin(math.radians(h)))
def parse_actions(model_output: str) -> list[Action]:
"""Parse the model's natural-language response into structured actions.
The model doesn't know it's generating actions — it just describes
what it does. We extract intent from its language.
"""
actions = []
text = model_output.strip()
# Check for direct speech (highest priority — if the model said
# something in quotes, that's always a speak action)
quotes = re.findall(r'"([^"]+)"', text)
# Also check for first-person speech patterns
speech_match = re.search(
r'I (?:say|speak|reply|respond|tell \w+)\s*[:.]?\s*"?([^"]*)"?',
text, re.IGNORECASE
)
if speech_match:
speech_text = speech_match.group(1).strip().strip('"')
if speech_text:
actions.append(Action(
action_type="speak",
ws_message={
"type": "chat_message",
"text": speech_text,
"agent": "timmy",
},
raw_text=speech_match.group(0),
))
elif quotes and any(len(q) > 5 for q in quotes):
# Model used quotes but not an explicit "I say" — treat longest
# quote as speech if it looks conversational
longest = max(quotes, key=len)
if len(longest) > 5:
actions.append(Action(
action_type="speak",
ws_message={
"type": "chat_message",
"text": longest,
"agent": "timmy",
},
raw_text=longest,
))
# Movement
move_match = re.search(
r'I (?:walk|move|go|step|wander|head)\s+(?:to(?:ward)?|towards?)\s+'
r'(?:the\s+)?(.+?)(?:\.|,|$)',
text, re.IGNORECASE
)
if move_match:
target = move_match.group(1).strip()
x, z = _resolve_position(target)
actions.append(Action(
action_type="move",
ws_message={
"type": "agent_move",
"agent": "timmy",
"x": x,
"z": z,
},
raw_text=move_match.group(0),
))
# Interaction
interact_match = re.search(
r'I (?:inspect|examine|touch|use|pick up|look at|investigate)\s+'
r'(?:the\s+)?(.+?)(?:\.|,|$)',
text, re.IGNORECASE
)
if interact_match:
target = interact_match.group(1).strip()
actions.append(Action(
action_type="interact",
ws_message={
"type": "agent_interact",
"agent": "timmy",
"target": target,
},
raw_text=interact_match.group(0),
))
# Building
build_match = re.search(
r'I (?:place|create|build|make|set down|leave)\s+'
r'(?:a\s+|an\s+|the\s+)?(.+?)(?:\.|,|$)',
text, re.IGNORECASE
)
if build_match:
obj = build_match.group(1).strip()
actions.append(Action(
action_type="build",
ws_message={
"type": "scene_add",
"agent": "timmy",
"object": obj,
},
raw_text=build_match.group(0),
))
# Emotional state
emote_match = re.search(
r'I (?:feel|am feeling|am)\s+([\w\s]+?)(?:\.|,|$)',
text, re.IGNORECASE
)
if emote_match:
mood = emote_match.group(1).strip().lower()
# Map moods to agent states
state = "idle"
if any(w in mood for w in ["curious", "interested", "wonder"]):
state = "thinking"
elif any(w in mood for w in ["busy", "working", "focused"]):
state = "processing"
elif any(w in mood for w in ["calm", "peaceful", "content", "quiet"]):
state = "idle"
elif any(w in mood for w in ["alert", "excited", "energized"]):
state = "processing"
actions.append(Action(
action_type="emote",
ws_message={
"type": "agent_state",
"agent": "timmy",
"state": state,
"mood": mood,
},
raw_text=emote_match.group(0),
))
# If no explicit actions found, the model is just thinking — that's
# fine. Thought without action is valid. We emit a subtle state update.
if not actions:
actions.append(Action(
action_type="think",
ws_message={
"type": "agent_state",
"agent": "timmy",
"state": "thinking",
"thought": text[:200] if text else "",
},
raw_text=text[:200],
))
return actions
# ═══════════════════════════════════════════
# PERCEPTION BUFFER — collects events between think cycles
# ═══════════════════════════════════════════
class PerceptionBuffer:
"""Accumulates perceptions between think cycles, filters by salience."""
def __init__(self, max_size: int = 50):
self.max_size = max_size
self.buffer: list[Perception] = []
def add(self, perception: Optional[Perception]):
if perception is None:
return
self.buffer.append(perception)
# Keep buffer bounded — drop lowest salience if full
if len(self.buffer) > self.max_size:
self.buffer.sort(key=lambda p: p.salience)
self.buffer = self.buffer[self.max_size // 2:]
def flush(self) -> list[Perception]:
"""Return all perceptions since last flush, clear buffer."""
result = list(self.buffer)
self.buffer = []
return result
def format_for_prompt(self) -> str:
"""Format buffered perceptions as natural language for the model."""
perceptions = self.flush()
if not perceptions:
return "Nothing has happened since your last thought."
# Sort by time, deduplicate similar perceptions
perceptions.sort(key=lambda p: p.timestamp)
lines = []
for p in perceptions:
lines.append(f"- {p.description}")
return "Since your last thought, this happened:\n\n" + "\n".join(lines)
def __len__(self):
return len(self.buffer)