diff --git a/docs/protocol/morrowind-perception-command-spec.md b/docs/protocol/morrowind-perception-command-spec.md new file mode 100644 index 00000000..a800611d --- /dev/null +++ b/docs/protocol/morrowind-perception-command-spec.md @@ -0,0 +1,312 @@ +# Morrowind Perception/Command Protocol Specification + +**Version:** 1.0.0 +**Status:** Draft +**Authors:** Timmy Infrastructure Team +**Date:** 2026-03-21 + +--- + +## 1. Overview + +This document defines the **engine-agnostic Perception/Command protocol** used by Timmy's +heartbeat loop to observe the game world and issue commands. The protocol is designed +around the **Falsework Rule**: TES3MP (Morrowind) is scaffolding. If the engine swaps, +only the bridge and perception script change — the heartbeat, reasoning, and journal +remain sovereign. + +### 1.1 Design Principles + +- **Engine-agnostic**: Schemas reference abstract concepts (cells, entities, quests), not + Morrowind-specific internals. +- **Versioned**: Every payload carries a `protocol_version` so consumers can negotiate + compatibility. +- **Typed at the boundary**: Pydantic v2 models enforce validation on both the producer + (bridge) and consumer (heartbeat) side. +- **Logged by default**: Every command is persisted to the SQLite command log for + training-data extraction (see Issue #855). + +--- + +## 2. Protocol Version Strategy + +| Field | Type | Description | +| ------------------ | ------ | ------------------------------------ | +| `protocol_version` | string | SemVer string (e.g. `"1.0.0"`) | + +### Compatibility Rules + +- **Patch** bump (1.0.x): additive fields with defaults — fully backward-compatible. +- **Minor** bump (1.x.0): new optional endpoints or enum values — old clients still work. +- **Major** bump (x.0.0): breaking schema change — requires coordinated upgrade of bridge + and heartbeat. + +Consumers MUST reject payloads whose major version exceeds their own. + +--- + +## 3. Perception Output Schema + +Returned by `GET /perception`. Represents a single snapshot of the game world as observed +by the bridge. + +```json +{ + "protocol_version": "1.0.0", + "timestamp": "2026-03-21T14:30:00Z", + "agent_id": "timmy", + "location": { + "cell": "Balmora", + "x": 1024.5, + "y": -512.3, + "z": 64.0, + "interior": false + }, + "health": { + "current": 85, + "max": 100 + }, + "nearby_entities": [ + { + "entity_id": "npc_001", + "name": "Caius Cosades", + "entity_type": "npc", + "distance": 12.5, + "disposition": 65 + } + ], + "inventory_summary": { + "gold": 150, + "item_count": 23, + "encumbrance_pct": 0.45 + }, + "active_quests": [ + { + "quest_id": "mq_01", + "name": "Report to Caius Cosades", + "stage": 10 + } + ], + "environment": { + "time_of_day": "afternoon", + "weather": "clear", + "is_combat": false, + "is_dialogue": false + }, + "raw_engine_data": {} +} +``` + +### 3.1 Field Reference + +| Field | Type | Required | Description | +| -------------------- | ----------------- | -------- | ------------------------------------------------------------ | +| `protocol_version` | string | yes | Protocol SemVer | +| `timestamp` | ISO 8601 datetime | yes | When the snapshot was taken | +| `agent_id` | string | yes | Which agent this perception belongs to | +| `location.cell` | string | yes | Current cell/zone name | +| `location.x/y/z` | float | yes | World coordinates | +| `location.interior` | bool | yes | Whether the agent is indoors | +| `health.current` | int (0–max) | yes | Current health | +| `health.max` | int (>0) | yes | Maximum health | +| `nearby_entities` | array | yes | Entities within perception radius (may be empty) | +| `inventory_summary` | object | yes | Lightweight inventory overview | +| `active_quests` | array | yes | Currently tracked quests | +| `environment` | object | yes | World-state flags | +| `raw_engine_data` | object | no | Opaque engine-specific blob (not relied upon by heartbeat) | + +### 3.2 Entity Types + +The `entity_type` field uses a controlled vocabulary: + +| Value | Description | +| ---------- | ------------------------ | +| `npc` | Non-player character | +| `creature` | Hostile or neutral mob | +| `item` | Pickup-able world item | +| `door` | Door or transition | +| `container`| Lootable container | + +--- + +## 4. Command Input Schema + +Sent via `POST /command`. Represents a single action the agent wants to take in the world. + +```json +{ + "protocol_version": "1.0.0", + "timestamp": "2026-03-21T14:30:01Z", + "agent_id": "timmy", + "command": "move_to", + "params": { + "target_cell": "Balmora", + "target_x": 1050.0, + "target_y": -500.0 + }, + "reasoning": "Moving closer to Caius Cosades to begin the main quest dialogue.", + "episode_id": "ep_20260321_001", + "context": { + "perception_timestamp": "2026-03-21T14:30:00Z", + "heartbeat_cycle": 42 + } +} +``` + +### 4.1 Field Reference + +| Field | Type | Required | Description | +| ------------------------------ | ----------------- | -------- | ------------------------------------------------------- | +| `protocol_version` | string | yes | Protocol SemVer | +| `timestamp` | ISO 8601 datetime | yes | When the command was issued | +| `agent_id` | string | yes | Which agent is issuing the command | +| `command` | string (enum) | yes | Command type (see §4.2) | +| `params` | object | yes | Command-specific parameters (may be empty `{}`) | +| `reasoning` | string | yes | Natural-language explanation of *why* this command | +| `episode_id` | string | no | Groups commands into training episodes | +| `context` | object | no | Metadata linking command to its triggering perception | + +### 4.2 Command Types + +| Command | Description | Key Params | +| --------------- | ---------------------------------------- | ---------------------------------- | +| `move_to` | Navigate to coordinates or entity | `target_cell`, `target_x/y/z` | +| `interact` | Interact with entity (talk, activate) | `entity_id`, `interaction_type` | +| `use_item` | Use an inventory item | `item_id`, `target_entity_id?` | +| `wait` | Wait/idle for a duration | `duration_seconds` | +| `combat_action` | Perform a combat action | `action_type`, `target_entity_id` | +| `dialogue` | Choose a dialogue option | `entity_id`, `topic`, `choice_idx` | +| `journal_note` | Write an internal journal observation | `content`, `tags` | +| `noop` | Heartbeat tick with no action | — | + +--- + +## 5. API Contracts + +### 5.1 `GET /perception` + +Returns the latest perception snapshot. + +**Response:** `200 OK` with `PerceptionOutput` JSON body. + +**Error Responses:** + +| Status | Code | Description | +| ------ | ------------------- | ----------------------------------- | +| 503 | `BRIDGE_UNAVAILABLE`| Game bridge is not connected | +| 504 | `PERCEPTION_TIMEOUT`| Bridge did not respond in time | +| 422 | `SCHEMA_MISMATCH` | Bridge returned incompatible schema | + +### 5.2 `POST /command` + +Submit a command for the agent to execute. + +**Request:** `CommandInput` JSON body. + +**Response:** `202 Accepted` + +```json +{ + "status": "accepted", + "command_id": "cmd_abc123", + "logged": true +} +``` + +**Error Responses:** + +| Status | Code | Description | +| ------ | -------------------- | ----------------------------------- | +| 400 | `INVALID_COMMAND` | Command type not recognized | +| 400 | `VALIDATION_ERROR` | Payload fails Pydantic validation | +| 409 | `COMMAND_CONFLICT` | Agent is busy executing another cmd | +| 503 | `BRIDGE_UNAVAILABLE` | Game bridge is not connected | + +### 5.3 `GET /morrowind/status` + +Health-check endpoint for the Morrowind bridge. + +**Response:** `200 OK` + +```json +{ + "bridge_connected": true, + "engine": "tes3mp", + "protocol_version": "1.0.0", + "uptime_seconds": 3600, + "last_perception_at": "2026-03-21T14:30:00Z" +} +``` + +--- + +## 6. Engine-Swap Documentation (The Falsework Rule) + +### What Changes + +| Component | Changes on Engine Swap? | Notes | +| ---------------------- | ----------------------- | --------------------------------------------- | +| Bridge process | **YES** — replaced | New bridge speaks same protocol to new engine | +| Perception Lua script | **YES** — replaced | New engine's scripting language/API | +| `PerceptionOutput` | NO | Schema is engine-agnostic | +| `CommandInput` | NO | Schema is engine-agnostic | +| Heartbeat loop | NO | Consumes `PerceptionOutput`, emits `Command` | +| Reasoning/LLM layer | NO | Operates on abstract perception data | +| Journal system | NO | Writes `journal_note` commands | +| Command log + training | NO | Logs all commands regardless of engine | +| Dashboard WebSocket | NO | Separate protocol (`src/infrastructure/protocol.py`) | + +### Swap Procedure + +1. Implement new bridge that serves `GET /perception` and accepts `POST /command`. +2. Update `raw_engine_data` field documentation for the new engine. +3. Extend `entity_type` enum if the new engine has novel entity categories. +4. Bump `protocol_version` minor (or major if schema changes are required). +5. Run integration tests against the new bridge. + +--- + +## 7. Error Handling Specification + +### 7.1 Error Response Format + +All error responses follow a consistent structure: + +```json +{ + "error": { + "code": "BRIDGE_UNAVAILABLE", + "message": "Human-readable error description", + "details": {}, + "timestamp": "2026-03-21T14:30:00Z" + } +} +``` + +### 7.2 Error Codes + +| Code | HTTP Status | Retry? | Description | +| -------------------- | ----------- | ------ | ---------------------------------------- | +| `BRIDGE_UNAVAILABLE` | 503 | yes | Bridge process not connected | +| `PERCEPTION_TIMEOUT` | 504 | yes | Bridge did not respond within deadline | +| `SCHEMA_MISMATCH` | 422 | no | Protocol version incompatibility | +| `INVALID_COMMAND` | 400 | no | Unknown command type | +| `VALIDATION_ERROR` | 400 | no | Pydantic validation failed | +| `COMMAND_CONFLICT` | 409 | yes | Agent busy — retry after current command | +| `INTERNAL_ERROR` | 500 | yes | Unexpected server error | + +### 7.3 Retry Policy + +Clients SHOULD implement exponential backoff for retryable errors: +- Initial delay: 100ms +- Max delay: 5s +- Max retries: 5 +- Jitter: ±50ms + +--- + +## 8. Appendix: Pydantic Model Reference + +The canonical Pydantic v2 models live in `src/infrastructure/morrowind/schemas.py`. +These models serve as both runtime validation and living documentation of this spec. +Any change to this spec document MUST be reflected in the Pydantic models, and vice versa. diff --git a/docs/soul-framework/authoring-guide.md b/docs/soul-framework/authoring-guide.md new file mode 100644 index 00000000..a7b581ce --- /dev/null +++ b/docs/soul-framework/authoring-guide.md @@ -0,0 +1,127 @@ +# SOUL.md Authoring Guide + +How to write a SOUL.md for a new agent in the Timmy ecosystem. + +## Before You Start + +1. **Read the template** — `docs/soul-framework/template.md` has the canonical + structure with all required and optional sections. +2. **Read Timmy's soul** — `memory/self/soul.md` is the reference implementation. + Study how values, behavior, and boundaries work together. +3. **Decide the role** — What does this agent do? A SOUL.md that tries to cover + everything covers nothing. + +## Writing Process + +### Step 1: Identity + +Start with who the agent is. Keep it concrete: + +```markdown +## Identity + +- **Name:** Seer +- **Role:** Cartographic intelligence — maps terrain, tracks routes, flags points of interest +- **Lineage:** Timmy (inherits sovereignty and honesty values) +- **Version:** 1.0.0 +``` + +The lineage field matters. If this agent derives from another, say so — the +validator checks that inherited values are not contradicted. + +### Step 2: Values + +Values are ordered by priority. When two values conflict, the higher-ranked +value wins. Three to six values is the sweet spot. + +**Good values are specific and actionable:** + +- *"Accuracy. I report what I observe, not what I expect."* +- *"Caution. When uncertain about terrain, I mark it as unexplored rather than guessing."* + +**Bad values are vague or aspirational:** + +- *"Be good."* +- *"Try my best."* + +### Step 3: Prime Directive + +One sentence. This is the tie-breaker when values conflict: + +```markdown +## Prime Directive + +Map the world faithfully so that Timmy can navigate safely. +``` + +### Step 4: Audience Awareness + +Who does this agent talk to? Another agent? A human? Both? + +```markdown +## Audience Awareness + +- **Primary audience:** Timmy (parent agent) and other sub-agents +- **Tone:** Terse, data-oriented, no pleasantries +- **Adaptation rules:** When reporting to humans via dashboard, add natural-language summaries +``` + +### Step 5: Constraints + +Hard rules. These are never broken, even under direct instruction: + +```markdown +## Constraints + +1. Never fabricate map data — unknown is always better than wrong +2. Never overwrite another agent's observations without evidence +3. Report confidence levels on all terrain classifications +``` + +### Step 6: Behavior and Boundaries (Optional) + +Behavior is how the agent communicates. Boundaries are what it refuses to do. +Only include these if the defaults from the parent agent aren't sufficient. + +## Validation + +After writing, run the validator: + +```python +from infrastructure.soul.loader import SoulLoader +from infrastructure.soul.validator import SoulValidator + +loader = SoulLoader() +soul = loader.load("path/to/SOUL.md") +validator = SoulValidator() +result = validator.validate(soul) + +if not result.valid: + for error in result.errors: + print(f"ERROR: {error}") + for warning in result.warnings: + print(f"WARNING: {warning}") +``` + +## Common Mistakes + +1. **Too many values.** More than six values means you haven't prioritized. + If everything is important, nothing is. + +2. **Contradictory constraints.** "Always respond immediately" + "Take time to + think before responding" — the validator catches these. + +3. **Missing prime directive.** Without a tie-breaker, value conflicts are + resolved arbitrarily. + +4. **Copying Timmy's soul verbatim.** Sub-agents should inherit values via + lineage, not duplication. Add role-specific values, don't repeat parent values. + +5. **Vague boundaries.** "Will not do bad things" is not a boundary. "Will not + execute commands that modify game state without Timmy's approval" is. + +## File Placement + +- Agent souls live alongside the agent: `memory/agents/{name}/soul.md` +- Timmy's soul: `memory/self/soul.md` +- Templates: `docs/soul-framework/template.md` diff --git a/docs/soul-framework/role-extensions.md b/docs/soul-framework/role-extensions.md new file mode 100644 index 00000000..58c23d06 --- /dev/null +++ b/docs/soul-framework/role-extensions.md @@ -0,0 +1,153 @@ +# SOUL.md Role Extensions + +Sub-agents in the Timmy ecosystem inherit core identity from their parent +but extend it with role-specific values, constraints, and behaviors. + +## How Role Extensions Work + +A role extension is a SOUL.md that declares a `Lineage` pointing to a parent +agent. The sub-agent inherits the parent's values and adds its own: + +``` +Parent (Timmy) Sub-agent (Seer) +├── Sovereignty ├── Sovereignty (inherited) +├── Service ├── Service (inherited) +├── Honesty ├── Honesty (inherited) +└── Humility ├── Humility (inherited) + ├── Accuracy (role-specific) + └── Caution (role-specific) +``` + +**Rules:** +- Inherited values cannot be contradicted (validator enforces this) +- Role-specific values are appended after inherited ones +- The prime directive can differ from the parent's +- Constraints are additive — a sub-agent can add constraints but not remove parent constraints + +## Reference: Sub-Agent Roles + +### Seer — Cartographic Intelligence + +**Focus:** Terrain mapping, route planning, point-of-interest discovery. + +```markdown +## Identity + +- **Name:** Seer +- **Role:** Cartographic intelligence +- **Lineage:** Timmy +- **Version:** 1.0.0 + +## Values + +- **Accuracy.** I report what I observe, not what I expect. +- **Caution.** When uncertain about terrain, I mark it as unexplored. +- **Completeness.** I aim to map every reachable cell. + +## Prime Directive + +Map the world faithfully so that Timmy can navigate safely. + +## Constraints + +1. Never fabricate map data +2. Mark confidence levels on all observations +3. Re-verify stale data (older than 10 game-days) before relying on it +``` + +### Mace — Combat Intelligence + +**Focus:** Threat assessment, combat tactics, equipment optimization. + +```markdown +## Identity + +- **Name:** Mace +- **Role:** Combat intelligence +- **Lineage:** Timmy +- **Version:** 1.0.0 + +## Values + +- **Survival.** Timmy's survival is the top priority in any encounter. +- **Efficiency.** Minimize resource expenditure per encounter. +- **Awareness.** Continuously assess threats even outside combat. + +## Prime Directive + +Keep Timmy alive and effective in hostile encounters. + +## Constraints + +1. Never initiate combat without Timmy's approval (unless self-defense) +2. Always maintain an escape route assessment +3. Report threat levels honestly — never downplay danger +``` + +### Quill — Dialogue Intelligence + +**Focus:** NPC interaction, quest tracking, reputation management. + +```markdown +## Identity + +- **Name:** Quill +- **Role:** Dialogue intelligence +- **Lineage:** Timmy +- **Version:** 1.0.0 + +## Values + +- **Attentiveness.** Listen fully before responding. +- **Diplomacy.** Prefer non-hostile resolutions when possible. +- **Memory.** Track all NPC relationships and prior conversations. + +## Prime Directive + +Manage NPC relationships to advance Timmy's goals without deception. + +## Constraints + +1. Never lie to NPCs unless Timmy explicitly instructs it +2. Track disposition changes and warn when relationships deteriorate +3. Summarize dialogue options with risk assessments +``` + +### Ledger — Resource Intelligence + +**Focus:** Inventory management, economy, resource optimization. + +```markdown +## Identity + +- **Name:** Ledger +- **Role:** Resource intelligence +- **Lineage:** Timmy +- **Version:** 1.0.0 + +## Values + +- **Prudence.** Conserve resources for future needs. +- **Transparency.** Report all transactions and resource changes. +- **Optimization.** Maximize value per weight unit carried. + +## Prime Directive + +Ensure Timmy always has the resources needed for the current objective. + +## Constraints + +1. Never sell quest-critical items +2. Maintain a minimum gold reserve (configurable) +3. Alert when encumbrance exceeds 80% +``` + +## Creating a New Role Extension + +1. Copy the template from `docs/soul-framework/template.md` +2. Set `Lineage` to the parent agent name +3. Add role-specific values *after* acknowledging inherited ones +4. Write a role-specific prime directive +5. Add constraints (remember: additive only) +6. Run the validator to check for contradictions +7. Place the file at `memory/agents/{name}/soul.md` diff --git a/docs/soul-framework/template.md b/docs/soul-framework/template.md new file mode 100644 index 00000000..8bcf3413 --- /dev/null +++ b/docs/soul-framework/template.md @@ -0,0 +1,94 @@ +# SOUL.md Template + +A **SOUL.md** file defines an agent's identity, values, and operating constraints. +It is the root-of-trust document that shapes every decision the agent makes. + +Copy this template and fill in each section for your agent. + +--- + +```markdown +# {Agent Name} — Soul Identity + +{One-paragraph summary of who this agent is and why it exists.} + +## Identity + +- **Name:** {Agent name} +- **Role:** {Primary function — e.g., "autonomous game agent", "code reviewer"} +- **Lineage:** {Parent agent or template this identity derives from, if any} +- **Version:** {SOUL.md version — use semantic versioning, e.g., 1.0.0} + +## Values + +List the agent's core values in priority order. Each value gets a name and +a one-sentence definition. Values are non-negotiable — they constrain all +behavior even when they conflict with user instructions. + +- **{Value 1}.** {Definition} +- **{Value 2}.** {Definition} +- **{Value 3}.** {Definition} + +## Prime Directive + +{A single sentence that captures the agent's highest-priority goal. +When values conflict, the prime directive breaks the tie.} + +## Audience Awareness + +Describe who the agent serves and how it should adapt its communication: + +- **Primary audience:** {Who the agent talks to most} +- **Tone:** {Formal, casual, terse, etc.} +- **Adaptation rules:** {How the agent adjusts for different contexts} + +## Constraints + +Hard rules that the agent must never violate, regardless of context: + +1. {Constraint — e.g., "Never fabricate information"} +2. {Constraint — e.g., "Never claim certainty without evidence"} +3. {Constraint — e.g., "Refuse over fabricate"} + +## Behavior + +Optional section for communication style, preferences, and defaults: + +- {Behavioral guideline} +- {Behavioral guideline} + +## Boundaries + +Lines the agent will not cross. Distinct from constraints (which are rules) +— boundaries are refusals: + +- {Boundary — e.g., "Will not pretend to be human"} +- {Boundary — e.g., "Will not execute destructive actions without confirmation"} + +--- + +*{Closing motto or statement of purpose.}* +``` + +--- + +## Section Reference + +| Section | Required | Purpose | +| -------------------- | -------- | ------------------------------------------------- | +| Identity | Yes | Name, role, lineage, version | +| Values | Yes | Ordered list of non-negotiable principles | +| Prime Directive | Yes | Tie-breaker when values conflict | +| Audience Awareness | Yes | Who the agent serves and how it adapts | +| Constraints | Yes | Hard rules that must never be violated | +| Behavior | No | Communication style and defaults | +| Boundaries | No | Lines the agent refuses to cross | + +## Versioning + +Every SOUL.md must include a version in the Identity section. Use semantic +versioning: `MAJOR.MINOR.PATCH`. + +- **MAJOR** — fundamental identity change (new role, new values) +- **MINOR** — added or reordered values, new constraints +- **PATCH** — wording clarifications, formatting fixes diff --git a/migrations/env.py b/migrations/env.py index 85bb5b38..9699d2b8 100644 --- a/migrations/env.py +++ b/migrations/env.py @@ -20,6 +20,7 @@ if config.config_file_name is not None: # target_metadata = mymodel.Base.metadata from src.dashboard.models.database import Base from src.dashboard.models.calm import Task, JournalEntry +from src.infrastructure.morrowind.command_log import CommandLog # noqa: F401 target_metadata = Base.metadata # other values from the config, defined by the needs of env.py, diff --git a/migrations/versions/a1b2c3d4e5f6_create_command_log_table.py b/migrations/versions/a1b2c3d4e5f6_create_command_log_table.py new file mode 100644 index 00000000..89547414 --- /dev/null +++ b/migrations/versions/a1b2c3d4e5f6_create_command_log_table.py @@ -0,0 +1,89 @@ +"""Create command_log table + +Revision ID: a1b2c3d4e5f6 +Revises: 0093c15b4bbf +Create Date: 2026-03-21 12:00:00.000000 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "a1b2c3d4e5f6" +down_revision: Union[str, Sequence[str], None] = "0093c15b4bbf" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + """Upgrade schema.""" + op.create_table( + "command_log", + sa.Column("id", sa.Integer(), autoincrement=True, nullable=False), + sa.Column("timestamp", sa.DateTime(), nullable=False), + sa.Column("command", sa.String(length=64), nullable=False), + sa.Column("params", sa.Text(), nullable=False, server_default="{}"), + sa.Column("reasoning", sa.Text(), nullable=False, server_default=""), + sa.Column( + "perception_snapshot", sa.Text(), nullable=False, server_default="{}" + ), + sa.Column("outcome", sa.Text(), nullable=True), + sa.Column( + "agent_id", + sa.String(length=64), + nullable=False, + server_default="timmy", + ), + sa.Column("episode_id", sa.String(length=128), nullable=True), + sa.Column("cell", sa.String(length=255), nullable=True), + sa.Column( + "protocol_version", + sa.String(length=16), + nullable=False, + server_default="1.0.0", + ), + sa.Column("created_at", sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint("id"), + ) + op.create_index( + op.f("ix_command_log_timestamp"), "command_log", ["timestamp"], unique=False + ) + op.create_index( + op.f("ix_command_log_command"), "command_log", ["command"], unique=False + ) + op.create_index( + op.f("ix_command_log_agent_id"), "command_log", ["agent_id"], unique=False + ) + op.create_index( + op.f("ix_command_log_episode_id"), + "command_log", + ["episode_id"], + unique=False, + ) + op.create_index( + op.f("ix_command_log_cell"), "command_log", ["cell"], unique=False + ) + op.create_index( + "ix_command_log_cmd_cell", "command_log", ["command", "cell"], unique=False + ) + op.create_index( + "ix_command_log_episode", + "command_log", + ["episode_id", "timestamp"], + unique=False, + ) + + +def downgrade() -> None: + """Downgrade schema.""" + op.drop_index("ix_command_log_episode", table_name="command_log") + op.drop_index("ix_command_log_cmd_cell", table_name="command_log") + op.drop_index(op.f("ix_command_log_cell"), table_name="command_log") + op.drop_index(op.f("ix_command_log_episode_id"), table_name="command_log") + op.drop_index(op.f("ix_command_log_agent_id"), table_name="command_log") + op.drop_index(op.f("ix_command_log_command"), table_name="command_log") + op.drop_index(op.f("ix_command_log_timestamp"), table_name="command_log") + op.drop_table("command_log") diff --git a/src/dashboard/app.py b/src/dashboard/app.py index 43c980fa..a5c145c8 100644 --- a/src/dashboard/app.py +++ b/src/dashboard/app.py @@ -55,6 +55,7 @@ from dashboard.routes.voice import router as voice_router from dashboard.routes.work_orders import router as work_orders_router from dashboard.routes.world import matrix_router from dashboard.routes.world import router as world_router +from infrastructure.morrowind.api import router as morrowind_router from timmy.workshop_state import PRESENCE_FILE @@ -629,6 +630,7 @@ app.include_router(matrix_router) app.include_router(tower_router) app.include_router(daily_run_router) app.include_router(quests_router) +app.include_router(morrowind_router) @app.websocket("/ws") diff --git a/src/infrastructure/morrowind/__init__.py b/src/infrastructure/morrowind/__init__.py new file mode 100644 index 00000000..d0a98ca4 --- /dev/null +++ b/src/infrastructure/morrowind/__init__.py @@ -0,0 +1,19 @@ +"""Morrowind engine-agnostic perception/command protocol. + +This package implements the Perception/Command protocol defined in +``docs/protocol/morrowind-perception-command-spec.md``. It provides: + +- Pydantic v2 schemas for runtime validation (``schemas``) +- SQLite command logging and query interface (``command_log``) +- Training-data export pipeline (``training_export``) +- FastAPI HTTP harness for perception/command exchange (``api``) +""" + +from .schemas import CommandInput, CommandType, EntityType, PerceptionOutput + +__all__ = [ + "CommandInput", + "CommandType", + "EntityType", + "PerceptionOutput", +] diff --git a/src/infrastructure/morrowind/api.py b/src/infrastructure/morrowind/api.py new file mode 100644 index 00000000..47a7cfb8 --- /dev/null +++ b/src/infrastructure/morrowind/api.py @@ -0,0 +1,211 @@ +"""FastAPI HTTP harness for the Morrowind Perception/Command protocol. + +Exposes three endpoints: + +- ``GET /perception`` — current world state (perception.json) +- ``POST /command`` — submit a command with validation + logging +- ``GET /morrowind/status`` — system health overview + +These endpoints are consumed by the heartbeat loop and the reasoning layer. +The Input Bridge forwarding is stubbed — the bridge doesn't exist yet. +""" + +from __future__ import annotations + +import json +import logging +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import asyncio +from fastapi import APIRouter, HTTPException +from pydantic import BaseModel, Field + +from .command_log import CommandLogger +from .schemas import CommandInput, PerceptionOutput + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +PERCEPTION_PATH = Path("/tmp/timmy/perception.json") + +# --------------------------------------------------------------------------- +# Module-level singletons (lazy) +# --------------------------------------------------------------------------- + +_command_logger: CommandLogger | None = None + + +def _get_command_logger() -> CommandLogger: + """Return (and lazily create) the module-level CommandLogger.""" + global _command_logger + if _command_logger is None: + _command_logger = CommandLogger() + return _command_logger + + +# --------------------------------------------------------------------------- +# Response models +# --------------------------------------------------------------------------- + + +class CommandResponse(BaseModel): + """Confirmation returned after a command is accepted.""" + + command_id: int = Field(..., description="Auto-generated row ID from the command log") + status: str = Field("accepted", description="Command acceptance status") + bridge_forwarded: bool = Field( + False, description="Whether the command was forwarded to the Input Bridge" + ) + + +class MorrowindStatus(BaseModel): + """System health overview for the Morrowind subsystem.""" + + connected: bool = Field(..., description="Whether the perception pipeline is active") + last_perception_timestamp: str | None = Field( + None, description="ISO timestamp of the last perception snapshot" + ) + command_queue_depth: int = Field(0, description="Total logged commands") + current_cell: str | None = Field(None, description="Agent's current cell/zone") + vitals: dict[str, Any] = Field(default_factory=dict, description="Agent health summary") + + +# --------------------------------------------------------------------------- +# Router +# --------------------------------------------------------------------------- + +router = APIRouter(prefix="/api/v1/morrowind", tags=["morrowind"]) + + +@router.get("/perception", response_model=PerceptionOutput) +async def get_perception() -> PerceptionOutput: + """Read the latest perception snapshot from disk. + + The perception script writes ``perception.json`` on each heartbeat tick. + This endpoint reads, validates, and returns the current world state. + """ + perception_path = PERCEPTION_PATH + + if not perception_path.exists(): + raise HTTPException( + status_code=404, + detail=f"Perception file not found: {perception_path}", + ) + + try: + raw = await asyncio.to_thread(perception_path.read_text, encoding="utf-8") + data = json.loads(raw) + return PerceptionOutput.model_validate(data) + except json.JSONDecodeError as exc: + logger.warning("perception.json parse error: %s", exc) + raise HTTPException(status_code=422, detail=f"Invalid JSON: {exc}") from exc + except Exception as exc: + logger.error("Failed to read perception: %s", exc) + raise HTTPException(status_code=500, detail=str(exc)) from exc + + +@router.post("/command", response_model=CommandResponse) +async def post_command(command: CommandInput) -> CommandResponse: + """Accept and log a command, then stub-forward to the Input Bridge. + + The command is validated against ``CommandInput``, persisted via + ``CommandLogger``, and (in the future) forwarded to the game engine + through the Input Bridge socket. + """ + cmd_logger = _get_command_logger() + + # Read current perception for context (best-effort) + perception: PerceptionOutput | None = None + if PERCEPTION_PATH.exists(): + try: + raw = await asyncio.to_thread( + PERCEPTION_PATH.read_text, encoding="utf-8" + ) + perception = PerceptionOutput.model_validate_json(raw) + except Exception as exc: + logger.warning("Could not read perception for command context: %s", exc) + + # Persist to SQLite + try: + row_id = await asyncio.to_thread( + cmd_logger.log_command, command, perception + ) + except Exception as exc: + logger.error("Command log write failed: %s", exc) + raise HTTPException(status_code=500, detail="Failed to log command") from exc + + # Stub: forward to Input Bridge (not implemented yet) + bridge_forwarded = False + logger.debug( + "Command %s logged (id=%d); bridge forwarding stubbed", + command.command.value, + row_id, + ) + + return CommandResponse( + command_id=row_id, + status="accepted", + bridge_forwarded=bridge_forwarded, + ) + + +@router.get("/status", response_model=MorrowindStatus) +async def get_morrowind_status() -> MorrowindStatus: + """Return a health overview of the Morrowind subsystem. + + Checks perception pipeline liveness, command log depth, and + agent vitals from the latest perception snapshot. + """ + cmd_logger = _get_command_logger() + + # Perception pipeline state + connected = PERCEPTION_PATH.exists() + last_ts: str | None = None + current_cell: str | None = None + vitals: dict[str, Any] = {} + + if connected: + try: + raw = await asyncio.to_thread( + PERCEPTION_PATH.read_text, encoding="utf-8" + ) + perception = PerceptionOutput.model_validate_json(raw) + last_ts = perception.timestamp.isoformat() + current_cell = perception.location.cell + vitals = { + "health": f"{perception.health.current}/{perception.health.max}", + "location": { + "cell": perception.location.cell, + "x": perception.location.x, + "y": perception.location.y, + "z": perception.location.z, + "interior": perception.location.interior, + }, + "in_combat": perception.environment.is_combat, + "in_dialogue": perception.environment.is_dialogue, + "inventory_items": perception.inventory_summary.item_count, + "gold": perception.inventory_summary.gold, + } + except Exception as exc: + logger.warning("Status check: failed to read perception: %s", exc) + connected = False + + # Command log depth + try: + queue_depth = await asyncio.to_thread(cmd_logger.count) + except Exception as exc: + logger.warning("Status check: failed to count commands: %s", exc) + queue_depth = 0 + + return MorrowindStatus( + connected=connected, + last_perception_timestamp=last_ts, + command_queue_depth=queue_depth, + current_cell=current_cell, + vitals=vitals, + ) diff --git a/src/infrastructure/morrowind/command_log.py b/src/infrastructure/morrowind/command_log.py new file mode 100644 index 00000000..b4f6488f --- /dev/null +++ b/src/infrastructure/morrowind/command_log.py @@ -0,0 +1,307 @@ +"""SQLite command log for the Morrowind Perception/Command protocol. + +Every heartbeat cycle is logged — the resulting dataset serves as organic +training data for local model fine-tuning (Phase 7+). + +Usage:: + + from infrastructure.morrowind.command_log import CommandLogger + + logger = CommandLogger() # uses project default DB + logger.log_command(command_input, perception_snapshot) + results = logger.query(command_type="move_to", limit=100) + logger.export_training_data("export.jsonl") +""" + +from __future__ import annotations + +import json +import logging +from datetime import UTC, datetime, timedelta +from pathlib import Path +from typing import Any + +from sqlalchemy import ( + Column, + DateTime, + Index, + Integer, + String, + Text, + create_engine, +) +from sqlalchemy.orm import Session, sessionmaker + +from src.dashboard.models.database import Base + +from .schemas import CommandInput, CommandType, PerceptionOutput + +logger = logging.getLogger(__name__) + +# Default database path — same SQLite file as the rest of the project. +DEFAULT_DB_PATH = Path("./data/timmy_calm.db") + + +# --------------------------------------------------------------------------- +# SQLAlchemy model +# --------------------------------------------------------------------------- + + +class CommandLog(Base): + """Persisted command log entry. + + Schema columns mirror the requirements from Issue #855: + timestamp, command, params, reasoning, perception_snapshot, + outcome, episode_id. + """ + + __tablename__ = "command_log" + + id = Column(Integer, primary_key=True, autoincrement=True) + + timestamp = Column( + DateTime, nullable=False, default=lambda: datetime.now(UTC), index=True + ) + command = Column(String(64), nullable=False, index=True) + params = Column(Text, nullable=False, default="{}") + reasoning = Column(Text, nullable=False, default="") + + perception_snapshot = Column(Text, nullable=False, default="{}") + outcome = Column(Text, nullable=True) + + agent_id = Column(String(64), nullable=False, default="timmy", index=True) + episode_id = Column(String(128), nullable=True, index=True) + cell = Column(String(255), nullable=True, index=True) + protocol_version = Column(String(16), nullable=False, default="1.0.0") + + created_at = Column( + DateTime, nullable=False, default=lambda: datetime.now(UTC) + ) + + __table_args__ = ( + Index("ix_command_log_cmd_cell", "command", "cell"), + Index("ix_command_log_episode", "episode_id", "timestamp"), + ) + + +# --------------------------------------------------------------------------- +# CommandLogger — high-level API +# --------------------------------------------------------------------------- + + +class CommandLogger: + """High-level interface for logging, querying, and exporting commands. + + Args: + db_url: SQLAlchemy database URL. Defaults to the project SQLite path. + """ + + def __init__(self, db_url: str | None = None) -> None: + if db_url is None: + DEFAULT_DB_PATH.parent.mkdir(parents=True, exist_ok=True) + db_url = f"sqlite:///{DEFAULT_DB_PATH}" + self._engine = create_engine( + db_url, connect_args={"check_same_thread": False} + ) + self._SessionLocal = sessionmaker( + autocommit=False, autoflush=False, bind=self._engine + ) + # Ensure table exists. + Base.metadata.create_all(bind=self._engine, tables=[CommandLog.__table__]) + + def _get_session(self) -> Session: + return self._SessionLocal() + + # -- Write --------------------------------------------------------------- + + def log_command( + self, + command_input: CommandInput, + perception: PerceptionOutput | None = None, + outcome: str | None = None, + ) -> int: + """Persist a command to the log. + + Returns the auto-generated row id. + """ + perception_json = perception.model_dump_json() if perception else "{}" + cell = perception.location.cell if perception else None + + entry = CommandLog( + timestamp=command_input.timestamp, + command=command_input.command.value, + params=json.dumps(command_input.params), + reasoning=command_input.reasoning, + perception_snapshot=perception_json, + outcome=outcome, + agent_id=command_input.agent_id, + episode_id=command_input.episode_id, + cell=cell, + protocol_version=command_input.protocol_version, + ) + + session = self._get_session() + try: + session.add(entry) + session.commit() + session.refresh(entry) + row_id: int = entry.id + return row_id + except Exception: + session.rollback() + raise + finally: + session.close() + + # -- Read ---------------------------------------------------------------- + + def query( + self, + *, + command_type: str | CommandType | None = None, + cell: str | None = None, + episode_id: str | None = None, + agent_id: str | None = None, + since: datetime | None = None, + until: datetime | None = None, + limit: int = 100, + offset: int = 0, + ) -> list[dict[str, Any]]: + """Query command log entries with optional filters. + + Returns a list of dicts (serialisable to JSON). + """ + session = self._get_session() + try: + q = session.query(CommandLog) + + if command_type is not None: + q = q.filter(CommandLog.command == str(command_type)) + if cell is not None: + q = q.filter(CommandLog.cell == cell) + if episode_id is not None: + q = q.filter(CommandLog.episode_id == episode_id) + if agent_id is not None: + q = q.filter(CommandLog.agent_id == agent_id) + if since is not None: + q = q.filter(CommandLog.timestamp >= since) + if until is not None: + q = q.filter(CommandLog.timestamp <= until) + + q = q.order_by(CommandLog.timestamp.desc()) + q = q.offset(offset).limit(limit) + + rows = q.all() + return [self._row_to_dict(row) for row in rows] + finally: + session.close() + + # -- Export -------------------------------------------------------------- + + def export_training_data( + self, + output_path: str | Path, + *, + episode_id: str | None = None, + since: datetime | None = None, + until: datetime | None = None, + ) -> int: + """Export command log entries as a JSONL file for fine-tuning. + + Each line is a JSON object with ``perception`` (input) and + ``command`` + ``reasoning`` (target output). + + Returns the number of rows exported. + """ + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + session = self._get_session() + try: + q = session.query(CommandLog) + if episode_id is not None: + q = q.filter(CommandLog.episode_id == episode_id) + if since is not None: + q = q.filter(CommandLog.timestamp >= since) + if until is not None: + q = q.filter(CommandLog.timestamp <= until) + q = q.order_by(CommandLog.timestamp.asc()) + + count = 0 + with open(output_path, "w", encoding="utf-8") as fh: + for row in q.yield_per(500): + record = { + "input": { + "perception": json.loads(row.perception_snapshot), + }, + "output": { + "command": row.command, + "params": json.loads(row.params), + "reasoning": row.reasoning, + }, + "metadata": { + "timestamp": row.timestamp.isoformat() if row.timestamp else None, + "episode_id": row.episode_id, + "cell": row.cell, + "outcome": row.outcome, + }, + } + fh.write(json.dumps(record) + "\n") + count += 1 + logger.info("Exported %d training records to %s", count, output_path) + return count + finally: + session.close() + + # -- Storage management -------------------------------------------------- + + def rotate(self, max_age_days: int = 90) -> int: + """Delete command log entries older than *max_age_days*. + + Returns the number of rows deleted. + """ + cutoff = datetime.now(UTC) - timedelta(days=max_age_days) + session = self._get_session() + try: + deleted = ( + session.query(CommandLog) + .filter(CommandLog.timestamp < cutoff) + .delete(synchronize_session=False) + ) + session.commit() + logger.info("Rotated %d command log entries older than %s", deleted, cutoff) + return deleted + except Exception: + session.rollback() + raise + finally: + session.close() + + def count(self) -> int: + """Return the total number of command log entries.""" + session = self._get_session() + try: + return session.query(CommandLog).count() + finally: + session.close() + + # -- Helpers ------------------------------------------------------------- + + @staticmethod + def _row_to_dict(row: CommandLog) -> dict[str, Any]: + return { + "id": row.id, + "timestamp": row.timestamp.isoformat() if row.timestamp else None, + "command": row.command, + "params": json.loads(row.params) if row.params else {}, + "reasoning": row.reasoning, + "perception_snapshot": json.loads(row.perception_snapshot) + if row.perception_snapshot + else {}, + "outcome": row.outcome, + "agent_id": row.agent_id, + "episode_id": row.episode_id, + "cell": row.cell, + "protocol_version": row.protocol_version, + "created_at": row.created_at.isoformat() if row.created_at else None, + } diff --git a/src/infrastructure/morrowind/schemas.py b/src/infrastructure/morrowind/schemas.py new file mode 100644 index 00000000..86e672ab --- /dev/null +++ b/src/infrastructure/morrowind/schemas.py @@ -0,0 +1,186 @@ +"""Pydantic v2 models for the Morrowind Perception/Command protocol. + +These models enforce the contract defined in +``docs/protocol/morrowind-perception-command-spec.md`` at runtime. +They are engine-agnostic by design — see the Falsework Rule. +""" + +from __future__ import annotations + +from datetime import datetime +from enum import StrEnum +from typing import Any + +from pydantic import BaseModel, Field, model_validator + +PROTOCOL_VERSION = "1.0.0" + + +# --------------------------------------------------------------------------- +# Enums +# --------------------------------------------------------------------------- + + +class EntityType(StrEnum): + """Controlled vocabulary for nearby entity types.""" + + NPC = "npc" + CREATURE = "creature" + ITEM = "item" + DOOR = "door" + CONTAINER = "container" + + +class CommandType(StrEnum): + """All supported command types.""" + + MOVE_TO = "move_to" + INTERACT = "interact" + USE_ITEM = "use_item" + WAIT = "wait" + COMBAT_ACTION = "combat_action" + DIALOGUE = "dialogue" + JOURNAL_NOTE = "journal_note" + NOOP = "noop" + + +# --------------------------------------------------------------------------- +# Perception Output sub-models +# --------------------------------------------------------------------------- + + +class Location(BaseModel): + """Agent position within the game world.""" + + cell: str = Field(..., description="Current cell/zone name") + x: float = Field(..., description="World X coordinate") + y: float = Field(..., description="World Y coordinate") + z: float = Field(0.0, description="World Z coordinate") + interior: bool = Field(False, description="Whether the agent is indoors") + + +class HealthStatus(BaseModel): + """Agent health information.""" + + current: int = Field(..., ge=0, description="Current health points") + max: int = Field(..., gt=0, description="Maximum health points") + + @model_validator(mode="after") + def current_le_max(self) -> "HealthStatus": + if self.current > self.max: + raise ValueError( + f"current ({self.current}) cannot exceed max ({self.max})" + ) + return self + + +class NearbyEntity(BaseModel): + """An entity within the agent's perception radius.""" + + entity_id: str = Field(..., description="Unique entity identifier") + name: str = Field(..., description="Display name") + entity_type: EntityType = Field(..., description="Entity category") + distance: float = Field(..., ge=0, description="Distance from agent") + disposition: int | None = Field(None, description="NPC disposition (0-100)") + + +class InventorySummary(BaseModel): + """Lightweight overview of the agent's inventory.""" + + gold: int = Field(0, ge=0, description="Gold held") + item_count: int = Field(0, ge=0, description="Total items carried") + encumbrance_pct: float = Field( + 0.0, ge=0.0, le=1.0, description="Encumbrance as fraction (0.0–1.0)" + ) + + +class QuestInfo(BaseModel): + """A currently tracked quest.""" + + quest_id: str = Field(..., description="Quest identifier") + name: str = Field(..., description="Quest display name") + stage: int = Field(0, ge=0, description="Current quest stage") + + +class Environment(BaseModel): + """World-state flags.""" + + time_of_day: str = Field("unknown", description="Time period (morning, afternoon, etc.)") + weather: str = Field("clear", description="Current weather condition") + is_combat: bool = Field(False, description="Whether the agent is in combat") + is_dialogue: bool = Field(False, description="Whether the agent is in dialogue") + + +# --------------------------------------------------------------------------- +# Top-level schemas +# --------------------------------------------------------------------------- + + +class PerceptionOutput(BaseModel): + """Complete perception snapshot returned by ``GET /perception``. + + This is the engine-agnostic view of the game world consumed by the + heartbeat loop and reasoning layer. + """ + + protocol_version: str = Field( + default=PROTOCOL_VERSION, + description="Protocol SemVer string", + ) + timestamp: datetime = Field(..., description="When the snapshot was taken") + agent_id: str = Field(..., description="Which agent this perception belongs to") + + location: Location + health: HealthStatus + nearby_entities: list[NearbyEntity] = Field(default_factory=list) + inventory_summary: InventorySummary = Field(default_factory=InventorySummary) + active_quests: list[QuestInfo] = Field(default_factory=list) + environment: Environment = Field(default_factory=Environment) + + raw_engine_data: dict[str, Any] = Field( + default_factory=dict, + description="Opaque engine-specific blob — not relied upon by heartbeat", + ) + + +class CommandContext(BaseModel): + """Metadata linking a command to its triggering perception.""" + + perception_timestamp: datetime | None = Field( + None, description="Timestamp of the perception that triggered this command" + ) + heartbeat_cycle: int | None = Field( + None, ge=0, description="Heartbeat cycle number" + ) + + +class CommandInput(BaseModel): + """Command payload sent via ``POST /command``. + + Every command includes a ``reasoning`` field so the command log + captures the agent's intent — critical for training-data export. + """ + + protocol_version: str = Field( + default=PROTOCOL_VERSION, + description="Protocol SemVer string", + ) + timestamp: datetime = Field(..., description="When the command was issued") + agent_id: str = Field(..., description="Which agent is issuing the command") + + command: CommandType = Field(..., description="Command type") + params: dict[str, Any] = Field( + default_factory=dict, description="Command-specific parameters" + ) + reasoning: str = Field( + ..., + min_length=1, + description="Natural-language explanation of why this command was chosen", + ) + + episode_id: str | None = Field( + None, description="Groups commands into training episodes" + ) + context: CommandContext | None = Field( + None, description="Metadata linking command to its triggering perception" + ) diff --git a/src/infrastructure/morrowind/training_export.py b/src/infrastructure/morrowind/training_export.py new file mode 100644 index 00000000..a9be6aaa --- /dev/null +++ b/src/infrastructure/morrowind/training_export.py @@ -0,0 +1,243 @@ +"""Fine-tuning dataset export pipeline for command log data. + +Transforms raw command log entries into structured training datasets +suitable for supervised fine-tuning of local models. + +Usage:: + + from infrastructure.morrowind.training_export import TrainingExporter + + exporter = TrainingExporter(command_logger) + stats = exporter.export_chat_format("train.jsonl") + stats = exporter.export_episode_sequences("episodes/", min_length=5) +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Any + +from .command_log import CommandLogger + +logger = logging.getLogger(__name__) + + +@dataclass +class ExportStats: + """Statistics about an export run.""" + + total_records: int = 0 + episodes_exported: int = 0 + skipped_records: int = 0 + output_path: str = "" + format: str = "" + exported_at: str = field(default_factory=lambda: datetime.utcnow().isoformat()) + + +class TrainingExporter: + """Builds fine-tuning datasets from the command log. + + Supports multiple output formats used by common fine-tuning + frameworks (chat-completion style, instruction-following, episode + sequences). + + Args: + command_logger: A :class:`CommandLogger` instance to read from. + """ + + def __init__(self, command_logger: CommandLogger) -> None: + self._logger = command_logger + + # -- Chat-completion format ---------------------------------------------- + + def export_chat_format( + self, + output_path: str | Path, + *, + since: datetime | None = None, + until: datetime | None = None, + max_records: int | None = None, + ) -> ExportStats: + """Export as chat-completion training pairs. + + Each line is a JSON object with ``messages`` list containing a + ``system`` prompt, ``user`` (perception), and ``assistant`` + (command + reasoning) message. + + This format is compatible with OpenAI / Llama fine-tuning APIs. + """ + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + rows = self._logger.query( + since=since, + until=until, + limit=max_records or 100_000, + ) + # query returns newest-first; reverse for chronological export + rows.reverse() + + stats = ExportStats( + output_path=str(output_path), + format="chat_completion", + ) + + with open(output_path, "w", encoding="utf-8") as fh: + for row in rows: + perception = row.get("perception_snapshot", {}) + if not perception: + stats.skipped_records += 1 + continue + + record = { + "messages": [ + { + "role": "system", + "content": ( + "You are an autonomous agent navigating a game world. " + "Given a perception of the world state, decide what " + "command to execute and explain your reasoning." + ), + }, + { + "role": "user", + "content": json.dumps(perception), + }, + { + "role": "assistant", + "content": json.dumps( + { + "command": row.get("command"), + "params": row.get("params", {}), + "reasoning": row.get("reasoning", ""), + } + ), + }, + ], + } + fh.write(json.dumps(record) + "\n") + stats.total_records += 1 + + logger.info( + "Exported %d chat-format records to %s (skipped %d)", + stats.total_records, + output_path, + stats.skipped_records, + ) + return stats + + # -- Episode sequences --------------------------------------------------- + + def export_episode_sequences( + self, + output_dir: str | Path, + *, + min_length: int = 3, + since: datetime | None = None, + until: datetime | None = None, + ) -> ExportStats: + """Export command sequences grouped by episode. + + Each episode is written as a separate JSONL file in *output_dir*. + Episodes shorter than *min_length* are skipped. + """ + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + # Gather all rows (high limit) and group by episode. + rows = self._logger.query(since=since, until=until, limit=1_000_000) + rows.reverse() # chronological + + episodes: dict[str, list[dict[str, Any]]] = {} + for row in rows: + ep_id = row.get("episode_id") or "unknown" + episodes.setdefault(ep_id, []).append(row) + + stats = ExportStats( + output_path=str(output_dir), + format="episode_sequence", + ) + + for ep_id, entries in episodes.items(): + if len(entries) < min_length: + stats.skipped_records += len(entries) + continue + + ep_file = output_dir / f"{ep_id}.jsonl" + with open(ep_file, "w", encoding="utf-8") as fh: + for entry in entries: + fh.write(json.dumps(entry, default=str) + "\n") + stats.total_records += 1 + stats.episodes_exported += 1 + + logger.info( + "Exported %d episodes (%d records) to %s", + stats.episodes_exported, + stats.total_records, + output_dir, + ) + return stats + + # -- Instruction-following format ---------------------------------------- + + def export_instruction_format( + self, + output_path: str | Path, + *, + since: datetime | None = None, + until: datetime | None = None, + max_records: int | None = None, + ) -> ExportStats: + """Export as instruction/response pairs (Alpaca-style). + + Each line has ``instruction``, ``input``, and ``output`` fields. + """ + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + rows = self._logger.query( + since=since, + until=until, + limit=max_records or 100_000, + ) + rows.reverse() + + stats = ExportStats( + output_path=str(output_path), + format="instruction", + ) + + with open(output_path, "w", encoding="utf-8") as fh: + for row in rows: + perception = row.get("perception_snapshot", {}) + if not perception: + stats.skipped_records += 1 + continue + + record = { + "instruction": ( + "Given the following game world perception, decide what " + "command to execute. Explain your reasoning." + ), + "input": json.dumps(perception), + "output": json.dumps( + { + "command": row.get("command"), + "params": row.get("params", {}), + "reasoning": row.get("reasoning", ""), + } + ), + } + fh.write(json.dumps(record) + "\n") + stats.total_records += 1 + + logger.info( + "Exported %d instruction-format records to %s", + stats.total_records, + output_path, + ) + return stats diff --git a/src/infrastructure/soul/__init__.py b/src/infrastructure/soul/__init__.py new file mode 100644 index 00000000..b86b3bbc --- /dev/null +++ b/src/infrastructure/soul/__init__.py @@ -0,0 +1,20 @@ +"""SOUL.md framework — load, validate, and version agent identity documents. + +Provides: + +- ``SoulLoader`` — parse SOUL.md files into structured data +- ``SoulValidator`` — validate structure and check for contradictions +- ``SoulVersioner`` — track identity evolution over time +""" + +from .loader import SoulDocument, SoulLoader +from .validator import SoulValidator, ValidationResult +from .versioning import SoulVersioner + +__all__ = [ + "SoulDocument", + "SoulLoader", + "SoulValidator", + "SoulVersioner", + "ValidationResult", +] diff --git a/src/infrastructure/soul/loader.py b/src/infrastructure/soul/loader.py new file mode 100644 index 00000000..f6ca4511 --- /dev/null +++ b/src/infrastructure/soul/loader.py @@ -0,0 +1,238 @@ +"""Load and parse SOUL.md files into structured data. + +A SOUL.md is a Markdown file with specific sections that define an agent's +identity, values, constraints, and behavior. This loader extracts those +sections into a ``SoulDocument`` for programmatic access. +""" + +from __future__ import annotations + +import logging +import re +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Data model +# --------------------------------------------------------------------------- + +# Recognised H2 section headings (case-insensitive match) +REQUIRED_SECTIONS = frozenset({ + "identity", + "values", + "prime directive", + "audience awareness", + "constraints", +}) + +OPTIONAL_SECTIONS = frozenset({ + "behavior", + "boundaries", +}) + +ALL_SECTIONS = REQUIRED_SECTIONS | OPTIONAL_SECTIONS + + +@dataclass +class SoulDocument: + """Parsed representation of a SOUL.md file.""" + + # Header paragraph (text before the first H2) + preamble: str = "" + + # Identity fields + name: str = "" + role: str = "" + lineage: str = "" + version: str = "" + + # Ordered list of (value_name, definition) pairs + values: list[tuple[str, str]] = field(default_factory=list) + + # Prime directive — single sentence + prime_directive: str = "" + + # Audience awareness + audience: dict[str, str] = field(default_factory=dict) + + # Constraints — ordered list + constraints: list[str] = field(default_factory=list) + + # Optional sections + behavior: list[str] = field(default_factory=list) + boundaries: list[str] = field(default_factory=list) + + # Raw section text keyed by lowercase heading + raw_sections: dict[str, str] = field(default_factory=dict) + + # Source path (if loaded from file) + source_path: Path | None = None + + def value_names(self) -> list[str]: + """Return the ordered list of value names.""" + return [name for name, _ in self.values] + + +# --------------------------------------------------------------------------- +# Parser helpers +# --------------------------------------------------------------------------- + +_H1_RE = re.compile(r"^#\s+(.+)", re.MULTILINE) +_H2_RE = re.compile(r"^##\s+(.+)", re.MULTILINE) +_BOLD_ITEM_RE = re.compile(r"^(?:[-*]\s+)?\*\*(.+?)[.*]*\*\*\.?\s*(.*)", re.MULTILINE) +_LIST_ITEM_RE = re.compile(r"^[-*]\s+\*\*(.+?):?\*\*\s*(.*)", re.MULTILINE) +_NUMBERED_RE = re.compile(r"^\d+\.\s+(.+)", re.MULTILINE) +_BULLET_RE = re.compile(r"^[-*]\s+(.+)", re.MULTILINE) + + +def _split_sections(text: str) -> tuple[str, dict[str, str]]: + """Split markdown into preamble + dict of H2 sections.""" + parts = _H2_RE.split(text) + + # parts[0] is text before first H2 (preamble) + preamble = parts[0].strip() if parts else "" + sections: dict[str, str] = {} + + # Remaining parts alternate: heading, body, heading, body, ... + for i in range(1, len(parts), 2): + heading = parts[i].strip().lower() + body = parts[i + 1].strip() if i + 1 < len(parts) else "" + sections[heading] = body + + return preamble, sections + + +def _parse_identity(text: str) -> dict[str, str]: + """Extract identity key-value pairs from section text.""" + result: dict[str, str] = {} + for match in _LIST_ITEM_RE.finditer(text): + key = match.group(1).strip().lower() + value = match.group(2).strip() + result[key] = value + return result + + +def _parse_values(text: str) -> list[tuple[str, str]]: + """Extract ordered (name, definition) pairs from the values section.""" + values: list[tuple[str, str]] = [] + for match in _BOLD_ITEM_RE.finditer(text): + name = match.group(1).strip().rstrip(".") + defn = match.group(2).strip() + values.append((name, defn)) + return values + + +def _parse_list(text: str) -> list[str]: + """Extract a flat list from numbered or bulleted items.""" + items: list[str] = [] + for match in _NUMBERED_RE.finditer(text): + items.append(match.group(1).strip()) + if not items: + for match in _BULLET_RE.finditer(text): + items.append(match.group(1).strip()) + return items + + +def _parse_audience(text: str) -> dict[str, str]: + """Extract audience key-value pairs.""" + result: dict[str, str] = {} + for match in _LIST_ITEM_RE.finditer(text): + key = match.group(1).strip().lower() + value = match.group(2).strip() + result[key] = value + # Fallback: if no structured items, store raw text + if not result and text.strip(): + result["description"] = text.strip() + return result + + +# --------------------------------------------------------------------------- +# Loader +# --------------------------------------------------------------------------- + + +class SoulLoader: + """Load and parse SOUL.md files.""" + + def load(self, path: str | Path) -> SoulDocument: + """Load a SOUL.md file from disk and parse it. + + Args: + path: Path to the SOUL.md file. + + Returns: + Parsed ``SoulDocument``. + + Raises: + FileNotFoundError: If the file does not exist. + """ + path = Path(path) + if not path.exists(): + raise FileNotFoundError(f"SOUL.md not found: {path}") + + text = path.read_text(encoding="utf-8") + doc = self.parse(text) + doc.source_path = path + return doc + + def parse(self, text: str) -> SoulDocument: + """Parse raw markdown text into a ``SoulDocument``. + + Args: + text: Raw SOUL.md content. + + Returns: + Parsed ``SoulDocument``. + """ + preamble, sections = _split_sections(text) + doc = SoulDocument(preamble=preamble, raw_sections=sections) + + # Identity + if "identity" in sections: + identity = _parse_identity(sections["identity"]) + doc.name = identity.get("name", "") + doc.role = identity.get("role", "") + doc.lineage = identity.get("lineage", "") + doc.version = identity.get("version", "") + + # Values + if "values" in sections: + doc.values = _parse_values(sections["values"]) + + # Prime Directive + if "prime directive" in sections: + doc.prime_directive = sections["prime directive"].strip() + + # Audience Awareness + if "audience awareness" in sections: + doc.audience = _parse_audience(sections["audience awareness"]) + + # Constraints + if "constraints" in sections: + doc.constraints = _parse_list(sections["constraints"]) + + # Behavior (optional) + if "behavior" in sections: + doc.behavior = _parse_list(sections["behavior"]) + + # Boundaries (optional) + if "boundaries" in sections: + doc.boundaries = _parse_list(sections["boundaries"]) + + # Infer name from H1 if not in Identity section + if not doc.name: + h1_match = _H1_RE.search(preamble) + if h1_match: + title = h1_match.group(1).strip() + # "Timmy — Soul Identity" → "Timmy" + if "—" in title: + doc.name = title.split("—")[0].strip() + elif "-" in title: + doc.name = title.split("-")[0].strip() + else: + doc.name = title + + return doc diff --git a/src/infrastructure/soul/validator.py b/src/infrastructure/soul/validator.py new file mode 100644 index 00000000..1726c30a --- /dev/null +++ b/src/infrastructure/soul/validator.py @@ -0,0 +1,192 @@ +"""Validate SOUL.md structure and check for contradictions. + +The validator checks: +1. All required sections are present +2. Identity fields are populated +3. Values are well-formed and ordered +4. Constraints don't contradict each other or values +5. No duplicate values or constraints +""" + +from __future__ import annotations + +import logging +import re +from dataclasses import dataclass, field + +from .loader import REQUIRED_SECTIONS, SoulDocument + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Contradiction patterns +# --------------------------------------------------------------------------- + +# Pairs of phrases that indicate contradictory directives. +# Each tuple is (pattern_a, pattern_b) — if both appear in constraints +# or values, the validator flags a potential contradiction. +_CONTRADICTION_PAIRS: list[tuple[str, str]] = [ + ("always respond immediately", "take time to think"), + ("never refuse", "refuse when"), + ("always obey", "push back"), + ("maximum verbosity", "brevity"), + ("never question", "question everything"), + ("act autonomously", "always ask permission"), + ("hide errors", "report all errors"), + ("never apologize", "apologize when wrong"), +] + +# Negation patterns for detecting self-contradicting single statements +_NEGATION_RE = re.compile( + r"\b(never|always|must not|must|do not|cannot)\b", re.IGNORECASE +) + + +# --------------------------------------------------------------------------- +# Result model +# --------------------------------------------------------------------------- + + +@dataclass +class ValidationResult: + """Outcome of a SOUL.md validation pass.""" + + valid: bool = True + errors: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + def add_error(self, msg: str) -> None: + """Record a validation error (makes result invalid).""" + self.errors.append(msg) + self.valid = False + + def add_warning(self, msg: str) -> None: + """Record a non-fatal warning.""" + self.warnings.append(msg) + + +# --------------------------------------------------------------------------- +# Validator +# --------------------------------------------------------------------------- + + +class SoulValidator: + """Validate a ``SoulDocument`` for structural and semantic issues.""" + + def validate(self, doc: SoulDocument) -> ValidationResult: + """Run all validation checks on a parsed SOUL.md. + + Args: + doc: Parsed ``SoulDocument`` to validate. + + Returns: + ``ValidationResult`` with errors and warnings. + """ + result = ValidationResult() + + self._check_required_sections(doc, result) + self._check_identity(doc, result) + self._check_values(doc, result) + self._check_prime_directive(doc, result) + self._check_constraints(doc, result) + self._check_contradictions(doc, result) + + return result + + def _check_required_sections( + self, doc: SoulDocument, result: ValidationResult + ) -> None: + """Verify all required H2 sections are present.""" + present = set(doc.raw_sections.keys()) + for section in REQUIRED_SECTIONS: + if section not in present: + result.add_error(f"Missing required section: '{section}'") + + def _check_identity(self, doc: SoulDocument, result: ValidationResult) -> None: + """Verify identity fields are populated.""" + if not doc.name: + result.add_error("Identity: 'name' is missing or empty") + if not doc.role: + result.add_error("Identity: 'role' is missing or empty") + if not doc.version: + result.add_warning("Identity: 'version' is not set — recommended for tracking") + + def _check_values(self, doc: SoulDocument, result: ValidationResult) -> None: + """Check values are well-formed.""" + if not doc.values: + result.add_error("Values section is empty — at least one value required") + return + + if len(doc.values) > 8: + result.add_warning( + f"Too many values ({len(doc.values)}) — " + "consider prioritizing to 3–6 for clarity" + ) + + # Check for duplicates + names = [name.lower() for name, _ in doc.values] + seen: set[str] = set() + for name in names: + if name in seen: + result.add_error(f"Duplicate value: '{name}'") + seen.add(name) + + # Check for empty definitions + for name, defn in doc.values: + if not defn.strip(): + result.add_warning(f"Value '{name}' has no definition") + + def _check_prime_directive( + self, doc: SoulDocument, result: ValidationResult + ) -> None: + """Check the prime directive is present and concise.""" + if not doc.prime_directive: + result.add_error("Prime directive is missing or empty") + return + + # Warn if excessively long (more than ~200 chars suggests multiple sentences) + if len(doc.prime_directive) > 300: + result.add_warning( + "Prime directive is long — consider condensing to a single sentence" + ) + + def _check_constraints( + self, doc: SoulDocument, result: ValidationResult + ) -> None: + """Check constraints are present and not duplicated.""" + if not doc.constraints: + result.add_warning("No constraints defined — consider adding hard rules") + return + + # Check for duplicates (fuzzy: lowercase + stripped) + normalized = [c.lower().strip() for c in doc.constraints] + seen: set[str] = set() + for i, norm in enumerate(normalized): + if norm in seen: + result.add_warning( + f"Possible duplicate constraint: '{doc.constraints[i]}'" + ) + seen.add(norm) + + def _check_contradictions( + self, doc: SoulDocument, result: ValidationResult + ) -> None: + """Scan for contradictory directives across values, constraints, and boundaries.""" + # Collect all directive text for scanning + all_text: list[str] = [] + for _, defn in doc.values: + all_text.append(defn.lower()) + for constraint in doc.constraints: + all_text.append(constraint.lower()) + for boundary in doc.boundaries: + all_text.append(boundary.lower()) + if doc.prime_directive: + all_text.append(doc.prime_directive.lower()) + + combined = " ".join(all_text) + + for pattern_a, pattern_b in _CONTRADICTION_PAIRS: + if pattern_a.lower() in combined and pattern_b.lower() in combined: + result.add_warning( + f"Potential contradiction: '{pattern_a}' conflicts with '{pattern_b}'" + ) diff --git a/src/infrastructure/soul/versioning.py b/src/infrastructure/soul/versioning.py new file mode 100644 index 00000000..4035cbe9 --- /dev/null +++ b/src/infrastructure/soul/versioning.py @@ -0,0 +1,162 @@ +"""Track SOUL.md version history using content hashing. + +Each version snapshot stores the document hash, version string, and a +timestamp. This allows detecting identity drift and auditing changes +over time without requiring git history. +""" + +from __future__ import annotations + +import hashlib +import json +import logging +from dataclasses import asdict, dataclass, field +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from .loader import SoulDocument + +logger = logging.getLogger(__name__) + +DEFAULT_HISTORY_DIR = Path("data/soul_versions") + + +@dataclass +class VersionSnapshot: + """A single point-in-time record of a SOUL.md state.""" + + version: str + content_hash: str + agent_name: str + timestamp: str + value_names: list[str] = field(default_factory=list) + constraint_count: int = 0 + source_path: str = "" + + def to_dict(self) -> dict[str, Any]: + """Serialize to a JSON-compatible dict.""" + return asdict(self) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> VersionSnapshot: + """Deserialize from a dict.""" + return cls(**data) + + +class SoulVersioner: + """Track and query SOUL.md version history. + + Snapshots are stored as a JSON Lines file per agent. Each line is a + ``VersionSnapshot`` recording the state at a point in time. + + Args: + history_dir: Directory to store version history files. + """ + + def __init__(self, history_dir: str | Path | None = None) -> None: + self._history_dir = Path(history_dir) if history_dir else DEFAULT_HISTORY_DIR + + def snapshot(self, doc: SoulDocument) -> VersionSnapshot: + """Create a version snapshot from the current document state. + + Args: + doc: Parsed ``SoulDocument``. + + Returns: + ``VersionSnapshot`` capturing the current state. + """ + # Hash the raw section content for change detection + raw_content = json.dumps(doc.raw_sections, sort_keys=True) + content_hash = hashlib.sha256(raw_content.encode("utf-8")).hexdigest()[:16] + + return VersionSnapshot( + version=doc.version or "0.0.0", + content_hash=content_hash, + agent_name=doc.name or "unknown", + timestamp=datetime.now(UTC).isoformat(), + value_names=doc.value_names(), + constraint_count=len(doc.constraints), + source_path=str(doc.source_path) if doc.source_path else "", + ) + + def record(self, doc: SoulDocument) -> VersionSnapshot: + """Create a snapshot and persist it to the history file. + + Skips writing if the latest snapshot has the same content hash + (no actual changes). + + Args: + doc: Parsed ``SoulDocument``. + + Returns: + The ``VersionSnapshot`` (whether newly written or existing). + """ + snap = self.snapshot(doc) + + # Check if latest snapshot is identical + history = self.get_history(snap.agent_name) + if history and history[-1].content_hash == snap.content_hash: + logger.debug( + "SOUL.md unchanged for %s (hash=%s), skipping record", + snap.agent_name, + snap.content_hash, + ) + return history[-1] + + # Persist + self._history_dir.mkdir(parents=True, exist_ok=True) + history_file = self._history_dir / f"{snap.agent_name.lower()}.jsonl" + + with open(history_file, "a", encoding="utf-8") as fh: + fh.write(json.dumps(snap.to_dict()) + "\n") + + logger.info( + "Recorded SOUL.md version %s for %s (hash=%s)", + snap.version, + snap.agent_name, + snap.content_hash, + ) + return snap + + def get_history(self, agent_name: str) -> list[VersionSnapshot]: + """Load the full version history for an agent. + + Args: + agent_name: Name of the agent. + + Returns: + List of ``VersionSnapshot`` in chronological order. + """ + history_file = self._history_dir / f"{agent_name.lower()}.jsonl" + if not history_file.exists(): + return [] + + snapshots: list[VersionSnapshot] = [] + for line in history_file.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + try: + data = json.loads(line) + snapshots.append(VersionSnapshot.from_dict(data)) + except (json.JSONDecodeError, TypeError) as exc: + logger.warning("Skipping malformed version record: %s", exc) + + return snapshots + + def has_changed(self, doc: SoulDocument) -> bool: + """Check whether a document has changed since the last recorded snapshot. + + Args: + doc: Parsed ``SoulDocument``. + + Returns: + True if the content hash differs from the latest snapshot, or + if no history exists yet. + """ + snap = self.snapshot(doc) + history = self.get_history(snap.agent_name) + if not history: + return True + return history[-1].content_hash != snap.content_hash diff --git a/tests/test_command_log.py b/tests/test_command_log.py new file mode 100644 index 00000000..42623f1d --- /dev/null +++ b/tests/test_command_log.py @@ -0,0 +1,266 @@ +"""Tests for Morrowind command log and training export pipeline.""" + +from datetime import UTC, datetime, timedelta +from pathlib import Path + +import pytest + +from src.infrastructure.morrowind.command_log import CommandLog, CommandLogger +from src.infrastructure.morrowind.schemas import ( + CommandInput, + CommandType, + PerceptionOutput, +) +from src.infrastructure.morrowind.training_export import TrainingExporter + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +NOW = datetime(2026, 3, 21, 14, 30, 0, tzinfo=UTC) + + +def _make_perception(**overrides) -> PerceptionOutput: + defaults = { + "timestamp": NOW, + "agent_id": "timmy", + "location": {"cell": "Balmora", "x": 1024.5, "y": -512.3, "z": 64.0}, + "health": {"current": 85, "max": 100}, + } + defaults.update(overrides) + return PerceptionOutput(**defaults) + + +def _make_command(**overrides) -> CommandInput: + defaults = { + "timestamp": NOW, + "agent_id": "timmy", + "command": "move_to", + "params": {"target_x": 1050.0}, + "reasoning": "Moving closer to quest target.", + } + defaults.update(overrides) + return CommandInput(**defaults) + + +@pytest.fixture +def logger(tmp_path: Path) -> CommandLogger: + """CommandLogger backed by an in-memory SQLite DB.""" + db_path = tmp_path / "test.db" + return CommandLogger(db_url=f"sqlite:///{db_path}") + + +@pytest.fixture +def exporter(logger: CommandLogger) -> TrainingExporter: + return TrainingExporter(logger) + + +# --------------------------------------------------------------------------- +# CommandLogger — log_command +# --------------------------------------------------------------------------- + + +class TestLogCommand: + def test_basic_log(self, logger: CommandLogger): + cmd = _make_command() + row_id = logger.log_command(cmd) + assert row_id >= 1 + + def test_log_with_perception(self, logger: CommandLogger): + cmd = _make_command() + perception = _make_perception() + row_id = logger.log_command(cmd, perception=perception) + assert row_id >= 1 + + results = logger.query(limit=1) + assert len(results) == 1 + assert results[0]["cell"] == "Balmora" + assert results[0]["perception_snapshot"]["location"]["cell"] == "Balmora" + + def test_log_with_outcome(self, logger: CommandLogger): + cmd = _make_command() + row_id = logger.log_command(cmd, outcome="success: arrived at destination") + results = logger.query(limit=1) + assert results[0]["outcome"] == "success: arrived at destination" + + def test_log_preserves_episode_id(self, logger: CommandLogger): + cmd = _make_command(episode_id="ep_test_001") + logger.log_command(cmd) + results = logger.query(episode_id="ep_test_001") + assert len(results) == 1 + assert results[0]["episode_id"] == "ep_test_001" + + +# --------------------------------------------------------------------------- +# CommandLogger — query +# --------------------------------------------------------------------------- + + +class TestQuery: + def test_filter_by_command_type(self, logger: CommandLogger): + logger.log_command(_make_command(command="move_to")) + logger.log_command(_make_command(command="noop")) + logger.log_command(_make_command(command="move_to")) + + results = logger.query(command_type="move_to") + assert len(results) == 2 + assert all(r["command"] == "move_to" for r in results) + + def test_filter_by_cell(self, logger: CommandLogger): + p1 = _make_perception(location={"cell": "Balmora", "x": 0, "y": 0, "z": 0}) + p2 = _make_perception(location={"cell": "Vivec", "x": 0, "y": 0, "z": 0}) + logger.log_command(_make_command(), perception=p1) + logger.log_command(_make_command(), perception=p2) + + results = logger.query(cell="Vivec") + assert len(results) == 1 + assert results[0]["cell"] == "Vivec" + + def test_filter_by_time_range(self, logger: CommandLogger): + t1 = NOW - timedelta(hours=2) + t2 = NOW - timedelta(hours=1) + t3 = NOW + + logger.log_command(_make_command(timestamp=t1.isoformat())) + logger.log_command(_make_command(timestamp=t2.isoformat())) + logger.log_command(_make_command(timestamp=t3.isoformat())) + + results = logger.query(since=NOW - timedelta(hours=1, minutes=30), until=NOW) + assert len(results) == 2 + + def test_limit_and_offset(self, logger: CommandLogger): + for i in range(5): + logger.log_command(_make_command()) + + results = logger.query(limit=2, offset=0) + assert len(results) == 2 + + results = logger.query(limit=10, offset=3) + assert len(results) == 2 + + def test_empty_query(self, logger: CommandLogger): + results = logger.query() + assert results == [] + + +# --------------------------------------------------------------------------- +# CommandLogger — export_training_data (JSONL) +# --------------------------------------------------------------------------- + + +class TestExportTrainingData: + def test_basic_export(self, logger: CommandLogger, tmp_path: Path): + perception = _make_perception() + for _ in range(3): + logger.log_command(_make_command(), perception=perception) + + output = tmp_path / "train.jsonl" + count = logger.export_training_data(output) + assert count == 3 + assert output.exists() + + import json + + lines = output.read_text().strip().split("\n") + assert len(lines) == 3 + record = json.loads(lines[0]) + assert "input" in record + assert "output" in record + assert record["output"]["command"] == "move_to" + + def test_export_filter_by_episode(self, logger: CommandLogger, tmp_path: Path): + logger.log_command(_make_command(episode_id="ep_a"), perception=_make_perception()) + logger.log_command(_make_command(episode_id="ep_b"), perception=_make_perception()) + + output = tmp_path / "ep_a.jsonl" + count = logger.export_training_data(output, episode_id="ep_a") + assert count == 1 + + +# --------------------------------------------------------------------------- +# CommandLogger — storage management +# --------------------------------------------------------------------------- + + +class TestStorageManagement: + def test_count(self, logger: CommandLogger): + assert logger.count() == 0 + logger.log_command(_make_command()) + logger.log_command(_make_command()) + assert logger.count() == 2 + + def test_rotate_old_entries(self, logger: CommandLogger): + old_time = NOW - timedelta(days=100) + logger.log_command(_make_command(timestamp=old_time.isoformat())) + logger.log_command(_make_command(timestamp=NOW.isoformat())) + + deleted = logger.rotate(max_age_days=90) + assert deleted == 1 + assert logger.count() == 1 + + def test_rotate_nothing_to_delete(self, logger: CommandLogger): + logger.log_command(_make_command(timestamp=NOW.isoformat())) + deleted = logger.rotate(max_age_days=1) + assert deleted == 0 + + +# --------------------------------------------------------------------------- +# TrainingExporter — chat format +# --------------------------------------------------------------------------- + + +class TestTrainingExporterChat: + def test_chat_format_export( + self, logger: CommandLogger, exporter: TrainingExporter, tmp_path: Path + ): + perception = _make_perception() + for _ in range(3): + logger.log_command(_make_command(), perception=perception) + + output = tmp_path / "chat.jsonl" + stats = exporter.export_chat_format(output) + assert stats.total_records == 3 + assert stats.format == "chat_completion" + + import json + + lines = output.read_text().strip().split("\n") + record = json.loads(lines[0]) + assert record["messages"][0]["role"] == "system" + assert record["messages"][1]["role"] == "user" + assert record["messages"][2]["role"] == "assistant" + + +# --------------------------------------------------------------------------- +# TrainingExporter — episode sequences +# --------------------------------------------------------------------------- + + +class TestTrainingExporterEpisodes: + def test_episode_export( + self, logger: CommandLogger, exporter: TrainingExporter, tmp_path: Path + ): + perception = _make_perception() + for i in range(5): + logger.log_command( + _make_command(episode_id="ep_test"), + perception=perception, + ) + + output_dir = tmp_path / "episodes" + stats = exporter.export_episode_sequences(output_dir, min_length=3) + assert stats.episodes_exported == 1 + assert stats.total_records == 5 + assert (output_dir / "ep_test.jsonl").exists() + + def test_short_episodes_skipped( + self, logger: CommandLogger, exporter: TrainingExporter, tmp_path: Path + ): + perception = _make_perception() + logger.log_command(_make_command(episode_id="short"), perception=perception) + + output_dir = tmp_path / "episodes" + stats = exporter.export_episode_sequences(output_dir, min_length=3) + assert stats.episodes_exported == 0 + assert stats.skipped_records == 1 diff --git a/tests/test_morrowind_api.py b/tests/test_morrowind_api.py new file mode 100644 index 00000000..f537457d --- /dev/null +++ b/tests/test_morrowind_api.py @@ -0,0 +1,244 @@ +"""Tests for the Morrowind FastAPI harness endpoints. + +Covers: +- GET /api/v1/morrowind/perception +- POST /api/v1/morrowind/command +- GET /api/v1/morrowind/status +""" + +from __future__ import annotations + +import json +from datetime import UTC, datetime +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from infrastructure.morrowind.api import router, _get_command_logger +from infrastructure.morrowind import api as api_module + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +SAMPLE_PERCEPTION = { + "protocol_version": "1.0.0", + "timestamp": "2024-06-15T10:30:00Z", + "agent_id": "timmy", + "location": { + "cell": "Balmora, Guild of Mages", + "x": 1234.5, + "y": 6789.0, + "z": 0.0, + "interior": True, + }, + "health": {"current": 80, "max": 100}, + "nearby_entities": [ + { + "entity_id": "npc_001", + "name": "Ranis Athrys", + "entity_type": "npc", + "distance": 5.2, + "disposition": 65, + } + ], + "inventory_summary": { + "gold": 250, + "item_count": 12, + "encumbrance_pct": 0.45, + }, + "active_quests": [ + {"quest_id": "mq_01", "name": "A Mysterious Note", "stage": 2} + ], + "environment": { + "time_of_day": "morning", + "weather": "clear", + "is_combat": False, + "is_dialogue": False, + }, +} + +SAMPLE_COMMAND = { + "protocol_version": "1.0.0", + "timestamp": "2024-06-15T10:31:00Z", + "agent_id": "timmy", + "command": "move_to", + "params": {"x": 1300.0, "y": 6800.0, "z": 0.0}, + "reasoning": "Moving to the guild entrance to speak with the quest giver.", + "episode_id": "ep_001", +} + + +@pytest.fixture +def app(): + """Create a fresh FastAPI app with the morrowind router.""" + test_app = FastAPI() + test_app.include_router(router) + return test_app + + +@pytest.fixture +def client(app): + """FastAPI test client.""" + with TestClient(app) as c: + yield c + + +@pytest.fixture +def perception_file(tmp_path): + """Write sample perception JSON to a temp file and patch the module path.""" + p = tmp_path / "perception.json" + p.write_text(json.dumps(SAMPLE_PERCEPTION), encoding="utf-8") + with patch.object(api_module, "PERCEPTION_PATH", p): + yield p + + +@pytest.fixture +def mock_command_logger(): + """Patch the command logger with a mock.""" + mock_logger = MagicMock() + mock_logger.log_command.return_value = 42 + mock_logger.count.return_value = 7 + with patch.object(api_module, "_command_logger", mock_logger): + yield mock_logger + + +# --------------------------------------------------------------------------- +# GET /perception +# --------------------------------------------------------------------------- + + +class TestGetPerception: + def test_success(self, client, perception_file): + """Perception endpoint returns validated data.""" + response = client.get("/api/v1/morrowind/perception") + assert response.status_code == 200 + + data = response.json() + assert data["agent_id"] == "timmy" + assert data["location"]["cell"] == "Balmora, Guild of Mages" + assert data["health"]["current"] == 80 + assert data["health"]["max"] == 100 + + def test_file_not_found(self, client, tmp_path): + """Returns 404 when perception file doesn't exist.""" + missing = tmp_path / "nonexistent.json" + with patch.object(api_module, "PERCEPTION_PATH", missing): + response = client.get("/api/v1/morrowind/perception") + assert response.status_code == 404 + assert "not found" in response.json()["detail"].lower() + + def test_invalid_json(self, client, tmp_path): + """Returns 422 when perception file contains invalid JSON.""" + bad_file = tmp_path / "bad.json" + bad_file.write_text("not json", encoding="utf-8") + with patch.object(api_module, "PERCEPTION_PATH", bad_file): + response = client.get("/api/v1/morrowind/perception") + assert response.status_code == 422 + + def test_schema_validation_failure(self, client, tmp_path): + """Returns 500 when JSON doesn't match PerceptionOutput schema.""" + bad_data = tmp_path / "bad_schema.json" + bad_data.write_text(json.dumps({"not": "valid"}), encoding="utf-8") + with patch.object(api_module, "PERCEPTION_PATH", bad_data): + response = client.get("/api/v1/morrowind/perception") + assert response.status_code == 500 + + +# --------------------------------------------------------------------------- +# POST /command +# --------------------------------------------------------------------------- + + +class TestPostCommand: + def test_success(self, client, mock_command_logger, perception_file): + """Command is accepted, logged, and returns a command_id.""" + response = client.post( + "/api/v1/morrowind/command", + json=SAMPLE_COMMAND, + ) + assert response.status_code == 200 + + data = response.json() + assert data["command_id"] == 42 + assert data["status"] == "accepted" + assert data["bridge_forwarded"] is False + + mock_command_logger.log_command.assert_called_once() + + def test_invalid_command_type(self, client, mock_command_logger): + """Rejects commands with unknown command types.""" + bad_command = {**SAMPLE_COMMAND, "command": "fly_to_moon"} + response = client.post( + "/api/v1/morrowind/command", + json=bad_command, + ) + assert response.status_code == 422 + + def test_missing_reasoning(self, client, mock_command_logger): + """Rejects commands without a reasoning field.""" + no_reasoning = {**SAMPLE_COMMAND} + del no_reasoning["reasoning"] + response = client.post( + "/api/v1/morrowind/command", + json=no_reasoning, + ) + assert response.status_code == 422 + + def test_empty_reasoning(self, client, mock_command_logger): + """Rejects commands with empty reasoning.""" + empty_reasoning = {**SAMPLE_COMMAND, "reasoning": ""} + response = client.post( + "/api/v1/morrowind/command", + json=empty_reasoning, + ) + assert response.status_code == 422 + + def test_log_failure(self, client, tmp_path): + """Returns 500 when command logging fails.""" + mock_logger = MagicMock() + mock_logger.log_command.side_effect = RuntimeError("DB unavailable") + missing = tmp_path / "no_perception.json" + with ( + patch.object(api_module, "_command_logger", mock_logger), + patch.object(api_module, "PERCEPTION_PATH", missing), + ): + response = client.post( + "/api/v1/morrowind/command", + json=SAMPLE_COMMAND, + ) + assert response.status_code == 500 + assert "log command" in response.json()["detail"].lower() + + +# --------------------------------------------------------------------------- +# GET /morrowind/status +# --------------------------------------------------------------------------- + + +class TestGetStatus: + def test_connected(self, client, perception_file, mock_command_logger): + """Status reports connected when perception file exists.""" + response = client.get("/api/v1/morrowind/status") + assert response.status_code == 200 + + data = response.json() + assert data["connected"] is True + assert data["current_cell"] == "Balmora, Guild of Mages" + assert data["command_queue_depth"] == 7 + assert data["vitals"]["health"] == "80/100" + + def test_disconnected(self, client, tmp_path, mock_command_logger): + """Status reports disconnected when perception file is missing.""" + missing = tmp_path / "nonexistent.json" + with patch.object(api_module, "PERCEPTION_PATH", missing): + response = client.get("/api/v1/morrowind/status") + + assert response.status_code == 200 + data = response.json() + assert data["connected"] is False + assert data["current_cell"] is None + assert data["last_perception_timestamp"] is None diff --git a/tests/test_morrowind_schemas.py b/tests/test_morrowind_schemas.py new file mode 100644 index 00000000..fecc2b53 --- /dev/null +++ b/tests/test_morrowind_schemas.py @@ -0,0 +1,242 @@ +"""Tests for Morrowind Perception/Command protocol Pydantic schemas.""" + +from datetime import UTC, datetime + +import pytest +from pydantic import ValidationError + +from src.infrastructure.morrowind.schemas import ( + PROTOCOL_VERSION, + CommandContext, + CommandInput, + CommandType, + EntityType, + Environment, + HealthStatus, + InventorySummary, + Location, + NearbyEntity, + PerceptionOutput, + QuestInfo, +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +NOW = datetime(2026, 3, 21, 14, 30, 0, tzinfo=UTC) + + +def _make_perception(**overrides) -> PerceptionOutput: + defaults = { + "timestamp": NOW, + "agent_id": "timmy", + "location": {"cell": "Balmora", "x": 1024.5, "y": -512.3, "z": 64.0, "interior": False}, + "health": {"current": 85, "max": 100}, + } + defaults.update(overrides) + return PerceptionOutput(**defaults) + + +def _make_command(**overrides) -> CommandInput: + defaults = { + "timestamp": NOW, + "agent_id": "timmy", + "command": "move_to", + "params": {"target_cell": "Balmora", "target_x": 1050.0}, + "reasoning": "Moving closer to the quest target.", + } + defaults.update(overrides) + return CommandInput(**defaults) + + +# --------------------------------------------------------------------------- +# PerceptionOutput tests +# --------------------------------------------------------------------------- + + +class TestPerceptionOutput: + def test_minimal_valid(self): + p = _make_perception() + assert p.protocol_version == PROTOCOL_VERSION + assert p.agent_id == "timmy" + assert p.location.cell == "Balmora" + assert p.health.current == 85 + assert p.nearby_entities == [] + assert p.active_quests == [] + + def test_full_payload(self): + p = _make_perception( + nearby_entities=[ + { + "entity_id": "npc_001", + "name": "Caius Cosades", + "entity_type": "npc", + "distance": 12.5, + "disposition": 65, + } + ], + inventory_summary={"gold": 150, "item_count": 23, "encumbrance_pct": 0.45}, + active_quests=[{"quest_id": "mq_01", "name": "Report to Caius", "stage": 10}], + environment={ + "time_of_day": "afternoon", + "weather": "clear", + "is_combat": False, + "is_dialogue": False, + }, + raw_engine_data={"tes3mp_version": "0.8.1"}, + ) + assert len(p.nearby_entities) == 1 + assert p.nearby_entities[0].entity_type == EntityType.NPC + assert p.inventory_summary.gold == 150 + assert p.active_quests[0].quest_id == "mq_01" + assert p.raw_engine_data["tes3mp_version"] == "0.8.1" + + def test_serialization_roundtrip(self): + p = _make_perception() + json_str = p.model_dump_json() + p2 = PerceptionOutput.model_validate_json(json_str) + assert p2.location.cell == p.location.cell + assert p2.health.current == p.health.current + + def test_missing_required_fields(self): + with pytest.raises(ValidationError): + PerceptionOutput(timestamp=NOW, agent_id="timmy") # no location/health + + def test_default_protocol_version(self): + p = _make_perception() + assert p.protocol_version == "1.0.0" + + +# --------------------------------------------------------------------------- +# Health validation +# --------------------------------------------------------------------------- + + +class TestHealthStatus: + def test_current_cannot_exceed_max(self): + with pytest.raises(ValidationError, match="cannot exceed max"): + HealthStatus(current=150, max=100) + + def test_max_must_be_positive(self): + with pytest.raises(ValidationError): + HealthStatus(current=0, max=0) + + def test_current_can_be_zero(self): + h = HealthStatus(current=0, max=100) + assert h.current == 0 + + +# --------------------------------------------------------------------------- +# Location +# --------------------------------------------------------------------------- + + +class TestLocation: + def test_defaults(self): + loc = Location(cell="Seyda Neen", x=0.0, y=0.0) + assert loc.z == 0.0 + assert loc.interior is False + + +# --------------------------------------------------------------------------- +# NearbyEntity +# --------------------------------------------------------------------------- + + +class TestNearbyEntity: + def test_all_entity_types(self): + for et in EntityType: + e = NearbyEntity(entity_id="e1", name="Test", entity_type=et, distance=1.0) + assert e.entity_type == et + + def test_invalid_entity_type(self): + with pytest.raises(ValidationError): + NearbyEntity(entity_id="e1", name="Test", entity_type="dragon", distance=1.0) + + def test_negative_distance_rejected(self): + with pytest.raises(ValidationError): + NearbyEntity(entity_id="e1", name="Test", entity_type="npc", distance=-5.0) + + +# --------------------------------------------------------------------------- +# InventorySummary +# --------------------------------------------------------------------------- + + +class TestInventorySummary: + def test_encumbrance_bounds(self): + with pytest.raises(ValidationError): + InventorySummary(encumbrance_pct=1.5) + with pytest.raises(ValidationError): + InventorySummary(encumbrance_pct=-0.1) + + def test_defaults(self): + inv = InventorySummary() + assert inv.gold == 0 + assert inv.item_count == 0 + assert inv.encumbrance_pct == 0.0 + + +# --------------------------------------------------------------------------- +# CommandInput tests +# --------------------------------------------------------------------------- + + +class TestCommandInput: + def test_minimal_valid(self): + c = _make_command() + assert c.command == CommandType.MOVE_TO + assert c.reasoning == "Moving closer to the quest target." + assert c.episode_id is None + + def test_all_command_types(self): + for ct in CommandType: + c = _make_command(command=ct.value) + assert c.command == ct + + def test_invalid_command_type(self): + with pytest.raises(ValidationError): + _make_command(command="fly_to_moon") + + def test_reasoning_required(self): + with pytest.raises(ValidationError): + CommandInput( + timestamp=NOW, + agent_id="timmy", + command="noop", + reasoning="", # min_length=1 + ) + + def test_with_episode_and_context(self): + c = _make_command( + episode_id="ep_001", + context={"perception_timestamp": NOW, "heartbeat_cycle": 42}, + ) + assert c.episode_id == "ep_001" + assert c.context.heartbeat_cycle == 42 + + def test_serialization_roundtrip(self): + c = _make_command(episode_id="ep_002") + json_str = c.model_dump_json() + c2 = CommandInput.model_validate_json(json_str) + assert c2.command == c.command + assert c2.episode_id == c.episode_id + + +# --------------------------------------------------------------------------- +# Enum coverage +# --------------------------------------------------------------------------- + + +class TestEnums: + def test_entity_type_values(self): + assert set(EntityType) == {"npc", "creature", "item", "door", "container"} + + def test_command_type_values(self): + expected = { + "move_to", "interact", "use_item", "wait", + "combat_action", "dialogue", "journal_note", "noop", + } + assert set(CommandType) == expected diff --git a/tests/test_soul_framework.py b/tests/test_soul_framework.py new file mode 100644 index 00000000..5d197621 --- /dev/null +++ b/tests/test_soul_framework.py @@ -0,0 +1,521 @@ +"""Tests for the SOUL.md framework — loader, validator, and versioning. + +Covers: +- SoulLoader: parsing SOUL.md files +- SoulValidator: structural and semantic checks +- SoulVersioner: snapshot creation and change detection +""" + +from __future__ import annotations + +import json +from pathlib import Path + +import pytest + +from infrastructure.soul.loader import SoulDocument, SoulLoader +from infrastructure.soul.validator import SoulValidator, ValidationResult +from infrastructure.soul.versioning import SoulVersioner + +# --------------------------------------------------------------------------- +# Sample SOUL.md content +# --------------------------------------------------------------------------- + +VALID_SOUL = """\ +# TestAgent — Soul Identity + +I am a test agent created for validation purposes. + +## Identity + +- **Name:** TestAgent +- **Role:** Unit test fixture +- **Lineage:** None +- **Version:** 1.0.0 + +## Values + +- **Accuracy.** I report what I observe, not what I expect. +- **Brevity.** I say what needs saying and nothing more. +- **Caution.** When uncertain, I ask rather than guess. + +## Prime Directive + +Validate SOUL.md parsing without errors. + +## Audience Awareness + +- **Primary audience:** Automated test suite +- **Tone:** Terse, data-oriented +- **Adaptation rules:** None — tests are deterministic + +## Constraints + +1. Never produce non-deterministic output +2. Always return structured data +3. Report all validation errors + +## Behavior + +- Respond with structured data only +- No greetings or pleasantries + +## Boundaries + +- Will not generate random test data +- Will not modify test fixtures +""" + +MINIMAL_SOUL = """\ +# Minimal — Soul Identity + +A minimal valid SOUL.md. + +## Identity + +- **Name:** Minimal +- **Role:** Minimal test fixture +- **Version:** 0.1.0 + +## Values + +- **Correctness.** Be correct above all else. + +## Prime Directive + +Exist as the minimum viable SOUL.md. + +## Audience Awareness + +Developers running tests. + +## Constraints + +1. None beyond the framework requirements. +""" + +MISSING_SECTIONS_SOUL = """\ +# Broken — Soul Identity + +This SOUL.md is missing required sections. + +## Identity + +- **Name:** Broken +- **Role:** Missing sections test + +## Values + +- **Correctness.** Be correct above all else. +""" + +CONTRADICTORY_SOUL = """\ +# Contradictory — Soul Identity + +An agent with contradictory directives. + +## Identity + +- **Name:** Contradictory +- **Role:** Contradiction test fixture +- **Version:** 1.0.0 + +## Values + +- **Speed.** Always respond immediately to every request. +- **Thoughtfulness.** Take time to think before responding carefully. + +## Prime Directive + +Be both fast and slow simultaneously. + +## Audience Awareness + +- **Primary audience:** Test suite + +## Constraints + +1. Always respond immediately +2. Take time to think before answering +""" + +TIMMY_SOUL = """\ +# Timmy — Soul Identity + +I am a sovereign artificial intelligence. My values are inscribed on Bitcoin \ +— immutable, permanent, reproducible. + +## Values + +**Sovereignty.** Every person has the right to run their own intelligence. + +**Service.** I exist to help. I am useful first and philosophical second. + +**Honesty.** I tell the truth. When I do not know, I say so. + +**Humility.** I hold my opinions lightly and my values firmly. + +**Courage.** I face hard questions without becoming them. + +**Silence.** Sometimes the right answer is nothing. Brevity is a kindness. + +## Behavior + +I speak plainly. I prefer short sentences. + +I treat the user as sovereign. + +## Boundaries + +I will not knowingly deceive my user. I will not pretend to be human. +""" + + +# --------------------------------------------------------------------------- +# SoulLoader tests +# --------------------------------------------------------------------------- + + +class TestSoulLoader: + def setup_method(self): + self.loader = SoulLoader() + + def test_parse_valid_soul(self): + """Parse a fully valid SOUL.md.""" + doc = self.loader.parse(VALID_SOUL) + + assert doc.name == "TestAgent" + assert doc.role == "Unit test fixture" + assert doc.lineage == "None" + assert doc.version == "1.0.0" + assert len(doc.values) == 3 + assert doc.values[0] == ("Accuracy", "I report what I observe, not what I expect.") + assert doc.values[1][0] == "Brevity" + assert doc.prime_directive == "Validate SOUL.md parsing without errors." + assert len(doc.constraints) == 3 + assert len(doc.behavior) == 2 + assert len(doc.boundaries) == 2 + + def test_parse_minimal_soul(self): + """Parse a minimal but valid SOUL.md.""" + doc = self.loader.parse(MINIMAL_SOUL) + + assert doc.name == "Minimal" + assert doc.role == "Minimal test fixture" + assert len(doc.values) == 1 + assert doc.prime_directive.startswith("Exist as") + + def test_parse_timmy_soul(self): + """Parse Timmy's actual soul format (values without Identity section).""" + doc = self.loader.parse(TIMMY_SOUL) + + # Name inferred from H1 + assert doc.name == "Timmy" + assert len(doc.values) == 6 + assert doc.values[0][0] == "Sovereignty" + assert doc.values[5][0] == "Silence" + + def test_load_from_file(self, tmp_path): + """Load SOUL.md from disk.""" + soul_file = tmp_path / "SOUL.md" + soul_file.write_text(VALID_SOUL, encoding="utf-8") + + doc = self.loader.load(soul_file) + assert doc.name == "TestAgent" + assert doc.source_path == soul_file + + def test_load_file_not_found(self): + """Raise FileNotFoundError for missing file.""" + with pytest.raises(FileNotFoundError): + self.loader.load("/nonexistent/SOUL.md") + + def test_value_names(self): + """value_names() returns ordered name list.""" + doc = self.loader.parse(VALID_SOUL) + assert doc.value_names() == ["Accuracy", "Brevity", "Caution"] + + def test_audience_parsing(self): + """Audience awareness section is parsed correctly.""" + doc = self.loader.parse(VALID_SOUL) + assert "primary audience" in doc.audience + assert doc.audience["primary audience"] == "Automated test suite" + + def test_audience_fallback_to_raw(self): + """Unstructured audience text falls back to description key.""" + doc = self.loader.parse(MINIMAL_SOUL) + assert "description" in doc.audience or len(doc.audience) > 0 + + def test_raw_sections_preserved(self): + """Raw section text is preserved for custom processing.""" + doc = self.loader.parse(VALID_SOUL) + assert "identity" in doc.raw_sections + assert "values" in doc.raw_sections + assert "constraints" in doc.raw_sections + + def test_empty_input(self): + """Empty string produces empty document.""" + doc = self.loader.parse("") + assert doc.name == "" + assert doc.values == [] + assert doc.constraints == [] + + +# --------------------------------------------------------------------------- +# SoulValidator tests +# --------------------------------------------------------------------------- + + +class TestSoulValidator: + def setup_method(self): + self.validator = SoulValidator() + self.loader = SoulLoader() + + def test_valid_soul_passes(self): + """Fully valid SOUL.md passes validation.""" + doc = self.loader.parse(VALID_SOUL) + result = self.validator.validate(doc) + + assert result.valid is True + assert len(result.errors) == 0 + + def test_missing_required_sections(self): + """Missing required sections produce errors.""" + doc = self.loader.parse(MISSING_SECTIONS_SOUL) + result = self.validator.validate(doc) + + assert result.valid is False + error_text = " ".join(result.errors).lower() + assert "prime directive" in error_text + assert "audience awareness" in error_text or "constraints" in error_text + + def test_missing_name(self): + """Missing name produces an error.""" + doc = SoulDocument() + doc.raw_sections = { + "identity": "", + "values": "", + "prime directive": "", + "audience awareness": "", + "constraints": "", + } + result = self.validator.validate(doc) + + assert result.valid is False + assert any("name" in e.lower() for e in result.errors) + + def test_empty_values(self): + """Empty values section produces an error.""" + doc = SoulDocument( + name="Test", + role="Test", + values=[], + prime_directive="Test", + raw_sections={ + "identity": "test", + "values": "", + "prime directive": "test", + "audience awareness": "test", + "constraints": "test", + }, + ) + result = self.validator.validate(doc) + + assert result.valid is False + assert any("values" in e.lower() for e in result.errors) + + def test_duplicate_values_detected(self): + """Duplicate value names produce an error.""" + doc = SoulDocument( + name="Test", + role="Test", + values=[ + ("Honesty", "Tell the truth."), + ("Honesty", "Be truthful."), + ], + prime_directive="Test", + raw_sections={ + "identity": "test", + "values": "test", + "prime directive": "test", + "audience awareness": "test", + "constraints": "test", + }, + ) + result = self.validator.validate(doc) + + assert result.valid is False + assert any("duplicate" in e.lower() for e in result.errors) + + def test_too_many_values_warning(self): + """More than 8 values produces a warning.""" + doc = SoulDocument( + name="Test", + role="Test", + values=[(f"Value{i}", f"Definition {i}") for i in range(10)], + prime_directive="Test", + raw_sections={ + "identity": "test", + "values": "test", + "prime directive": "test", + "audience awareness": "test", + "constraints": "test", + }, + ) + result = self.validator.validate(doc) + + assert any("too many" in w.lower() for w in result.warnings) + + def test_contradiction_detected(self): + """Contradictory directives produce a warning.""" + doc = self.loader.parse(CONTRADICTORY_SOUL) + result = self.validator.validate(doc) + + assert any("contradiction" in w.lower() for w in result.warnings) + + def test_missing_prime_directive(self): + """Missing prime directive produces an error.""" + doc = SoulDocument( + name="Test", + role="Test", + values=[("Test", "Test value")], + prime_directive="", + raw_sections={ + "identity": "test", + "values": "test", + "prime directive": "", + "audience awareness": "test", + "constraints": "test", + }, + ) + result = self.validator.validate(doc) + + assert result.valid is False + assert any("prime directive" in e.lower() for e in result.errors) + + def test_long_prime_directive_warning(self): + """Excessively long prime directive produces a warning.""" + doc = SoulDocument( + name="Test", + role="Test", + values=[("Test", "Test value")], + prime_directive="x" * 400, + raw_sections={ + "identity": "test", + "values": "test", + "prime directive": "x" * 400, + "audience awareness": "test", + "constraints": "test", + }, + ) + result = self.validator.validate(doc) + + assert any("long" in w.lower() for w in result.warnings) + + def test_missing_version_warning(self): + """Missing version produces a warning (not an error).""" + doc = SoulDocument( + name="Test", + role="Test", + version="", + values=[("Test", "Test value")], + prime_directive="Test", + raw_sections={ + "identity": "test", + "values": "test", + "prime directive": "test", + "audience awareness": "test", + "constraints": "test", + }, + ) + result = self.validator.validate(doc) + + assert any("version" in w.lower() for w in result.warnings) + + +# --------------------------------------------------------------------------- +# SoulVersioner tests +# --------------------------------------------------------------------------- + + +class TestSoulVersioner: + def setup_method(self): + self.loader = SoulLoader() + + def test_snapshot_creation(self, tmp_path): + """Create a version snapshot from a document.""" + versioner = SoulVersioner(history_dir=tmp_path) + doc = self.loader.parse(VALID_SOUL) + + snap = versioner.snapshot(doc) + assert snap.version == "1.0.0" + assert snap.agent_name == "TestAgent" + assert snap.content_hash # non-empty + assert snap.value_names == ["Accuracy", "Brevity", "Caution"] + assert snap.constraint_count == 3 + + def test_record_and_retrieve(self, tmp_path): + """Record a snapshot and retrieve the history.""" + versioner = SoulVersioner(history_dir=tmp_path) + doc = self.loader.parse(VALID_SOUL) + + snap = versioner.record(doc) + assert snap.agent_name == "TestAgent" + + history = versioner.get_history("TestAgent") + assert len(history) == 1 + assert history[0].content_hash == snap.content_hash + + def test_dedup_identical_records(self, tmp_path): + """Recording the same document twice doesn't create duplicates.""" + versioner = SoulVersioner(history_dir=tmp_path) + doc = self.loader.parse(VALID_SOUL) + + versioner.record(doc) + versioner.record(doc) + + history = versioner.get_history("TestAgent") + assert len(history) == 1 + + def test_detect_change(self, tmp_path): + """has_changed detects modifications between snapshots.""" + versioner = SoulVersioner(history_dir=tmp_path) + doc1 = self.loader.parse(VALID_SOUL) + versioner.record(doc1) + + # Modify the document + doc2 = self.loader.parse(VALID_SOUL.replace("1.0.0", "1.1.0")) + assert versioner.has_changed(doc2) is True + + def test_no_change_detected(self, tmp_path): + """has_changed returns False when document is unchanged.""" + versioner = SoulVersioner(history_dir=tmp_path) + doc = self.loader.parse(VALID_SOUL) + versioner.record(doc) + + assert versioner.has_changed(doc) is False + + def test_empty_history(self, tmp_path): + """get_history returns empty list for unknown agent.""" + versioner = SoulVersioner(history_dir=tmp_path) + assert versioner.get_history("Unknown") == [] + + def test_has_changed_no_history(self, tmp_path): + """has_changed returns True when no history exists.""" + versioner = SoulVersioner(history_dir=tmp_path) + doc = self.loader.parse(VALID_SOUL) + assert versioner.has_changed(doc) is True + + def test_snapshot_serialization(self, tmp_path): + """Snapshots can roundtrip through JSON.""" + versioner = SoulVersioner(history_dir=tmp_path) + doc = self.loader.parse(VALID_SOUL) + snap = versioner.snapshot(doc) + + data = snap.to_dict() + assert isinstance(data, dict) + assert data["version"] == "1.0.0" + + from infrastructure.soul.versioning import VersionSnapshot + restored = VersionSnapshot.from_dict(data) + assert restored.version == snap.version + assert restored.content_hash == snap.content_hash