forked from Rockachopa/Timmy-time-dashboard
Implement two foundational infrastructure pieces for the Morrowind integration: 1. Perception/Command Protocol (Issue #859): - Formal spec document with JSON schemas, API contracts, versioning strategy - Engine-agnostic design following the Falsework Rule - Pydantic v2 models (PerceptionOutput, CommandInput) for runtime validation 2. SQLite Command Log + Training Pipeline (Issue #855): - SQLAlchemy model for command_log table with full indexing - CommandLogger class with log_command(), query(), export_training_data() - TrainingExporter with chat-completion, episode, and instruction formats - Storage management (rotation/archival) utilities - Alembic migration for the new table Includes 39 passing tests covering schema validation, logging, querying, and export. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
307
src/infrastructure/morrowind/command_log.py
Normal file
307
src/infrastructure/morrowind/command_log.py
Normal file
@@ -0,0 +1,307 @@
|
||||
"""SQLite command log for the Morrowind Perception/Command protocol.
|
||||
|
||||
Every heartbeat cycle is logged — the resulting dataset serves as organic
|
||||
training data for local model fine-tuning (Phase 7+).
|
||||
|
||||
Usage::
|
||||
|
||||
from infrastructure.morrowind.command_log import CommandLogger
|
||||
|
||||
logger = CommandLogger() # uses project default DB
|
||||
logger.log_command(command_input, perception_snapshot)
|
||||
results = logger.query(command_type="move_to", limit=100)
|
||||
logger.export_training_data("export.jsonl")
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import (
|
||||
Column,
|
||||
DateTime,
|
||||
Index,
|
||||
Integer,
|
||||
String,
|
||||
Text,
|
||||
create_engine,
|
||||
)
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from src.dashboard.models.database import Base
|
||||
|
||||
from .schemas import CommandInput, CommandType, PerceptionOutput
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default database path — same SQLite file as the rest of the project.
|
||||
DEFAULT_DB_PATH = Path("./data/timmy_calm.db")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SQLAlchemy model
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class CommandLog(Base):
|
||||
"""Persisted command log entry.
|
||||
|
||||
Schema columns mirror the requirements from Issue #855:
|
||||
timestamp, command, params, reasoning, perception_snapshot,
|
||||
outcome, episode_id.
|
||||
"""
|
||||
|
||||
__tablename__ = "command_log"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
|
||||
timestamp = Column(
|
||||
DateTime, nullable=False, default=lambda: datetime.now(UTC), index=True
|
||||
)
|
||||
command = Column(String(64), nullable=False, index=True)
|
||||
params = Column(Text, nullable=False, default="{}")
|
||||
reasoning = Column(Text, nullable=False, default="")
|
||||
|
||||
perception_snapshot = Column(Text, nullable=False, default="{}")
|
||||
outcome = Column(Text, nullable=True)
|
||||
|
||||
agent_id = Column(String(64), nullable=False, default="timmy", index=True)
|
||||
episode_id = Column(String(128), nullable=True, index=True)
|
||||
cell = Column(String(255), nullable=True, index=True)
|
||||
protocol_version = Column(String(16), nullable=False, default="1.0.0")
|
||||
|
||||
created_at = Column(
|
||||
DateTime, nullable=False, default=lambda: datetime.now(UTC)
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
Index("ix_command_log_cmd_cell", "command", "cell"),
|
||||
Index("ix_command_log_episode", "episode_id", "timestamp"),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CommandLogger — high-level API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class CommandLogger:
|
||||
"""High-level interface for logging, querying, and exporting commands.
|
||||
|
||||
Args:
|
||||
db_url: SQLAlchemy database URL. Defaults to the project SQLite path.
|
||||
"""
|
||||
|
||||
def __init__(self, db_url: str | None = None) -> None:
|
||||
if db_url is None:
|
||||
DEFAULT_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
db_url = f"sqlite:///{DEFAULT_DB_PATH}"
|
||||
self._engine = create_engine(
|
||||
db_url, connect_args={"check_same_thread": False}
|
||||
)
|
||||
self._SessionLocal = sessionmaker(
|
||||
autocommit=False, autoflush=False, bind=self._engine
|
||||
)
|
||||
# Ensure table exists.
|
||||
Base.metadata.create_all(bind=self._engine, tables=[CommandLog.__table__])
|
||||
|
||||
def _get_session(self) -> Session:
|
||||
return self._SessionLocal()
|
||||
|
||||
# -- Write ---------------------------------------------------------------
|
||||
|
||||
def log_command(
|
||||
self,
|
||||
command_input: CommandInput,
|
||||
perception: PerceptionOutput | None = None,
|
||||
outcome: str | None = None,
|
||||
) -> int:
|
||||
"""Persist a command to the log.
|
||||
|
||||
Returns the auto-generated row id.
|
||||
"""
|
||||
perception_json = perception.model_dump_json() if perception else "{}"
|
||||
cell = perception.location.cell if perception else None
|
||||
|
||||
entry = CommandLog(
|
||||
timestamp=command_input.timestamp,
|
||||
command=command_input.command.value,
|
||||
params=json.dumps(command_input.params),
|
||||
reasoning=command_input.reasoning,
|
||||
perception_snapshot=perception_json,
|
||||
outcome=outcome,
|
||||
agent_id=command_input.agent_id,
|
||||
episode_id=command_input.episode_id,
|
||||
cell=cell,
|
||||
protocol_version=command_input.protocol_version,
|
||||
)
|
||||
|
||||
session = self._get_session()
|
||||
try:
|
||||
session.add(entry)
|
||||
session.commit()
|
||||
session.refresh(entry)
|
||||
row_id: int = entry.id
|
||||
return row_id
|
||||
except Exception:
|
||||
session.rollback()
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
# -- Read ----------------------------------------------------------------
|
||||
|
||||
def query(
|
||||
self,
|
||||
*,
|
||||
command_type: str | CommandType | None = None,
|
||||
cell: str | None = None,
|
||||
episode_id: str | None = None,
|
||||
agent_id: str | None = None,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Query command log entries with optional filters.
|
||||
|
||||
Returns a list of dicts (serialisable to JSON).
|
||||
"""
|
||||
session = self._get_session()
|
||||
try:
|
||||
q = session.query(CommandLog)
|
||||
|
||||
if command_type is not None:
|
||||
q = q.filter(CommandLog.command == str(command_type))
|
||||
if cell is not None:
|
||||
q = q.filter(CommandLog.cell == cell)
|
||||
if episode_id is not None:
|
||||
q = q.filter(CommandLog.episode_id == episode_id)
|
||||
if agent_id is not None:
|
||||
q = q.filter(CommandLog.agent_id == agent_id)
|
||||
if since is not None:
|
||||
q = q.filter(CommandLog.timestamp >= since)
|
||||
if until is not None:
|
||||
q = q.filter(CommandLog.timestamp <= until)
|
||||
|
||||
q = q.order_by(CommandLog.timestamp.desc())
|
||||
q = q.offset(offset).limit(limit)
|
||||
|
||||
rows = q.all()
|
||||
return [self._row_to_dict(row) for row in rows]
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
# -- Export --------------------------------------------------------------
|
||||
|
||||
def export_training_data(
|
||||
self,
|
||||
output_path: str | Path,
|
||||
*,
|
||||
episode_id: str | None = None,
|
||||
since: datetime | None = None,
|
||||
until: datetime | None = None,
|
||||
) -> int:
|
||||
"""Export command log entries as a JSONL file for fine-tuning.
|
||||
|
||||
Each line is a JSON object with ``perception`` (input) and
|
||||
``command`` + ``reasoning`` (target output).
|
||||
|
||||
Returns the number of rows exported.
|
||||
"""
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
session = self._get_session()
|
||||
try:
|
||||
q = session.query(CommandLog)
|
||||
if episode_id is not None:
|
||||
q = q.filter(CommandLog.episode_id == episode_id)
|
||||
if since is not None:
|
||||
q = q.filter(CommandLog.timestamp >= since)
|
||||
if until is not None:
|
||||
q = q.filter(CommandLog.timestamp <= until)
|
||||
q = q.order_by(CommandLog.timestamp.asc())
|
||||
|
||||
count = 0
|
||||
with open(output_path, "w", encoding="utf-8") as fh:
|
||||
for row in q.yield_per(500):
|
||||
record = {
|
||||
"input": {
|
||||
"perception": json.loads(row.perception_snapshot),
|
||||
},
|
||||
"output": {
|
||||
"command": row.command,
|
||||
"params": json.loads(row.params),
|
||||
"reasoning": row.reasoning,
|
||||
},
|
||||
"metadata": {
|
||||
"timestamp": row.timestamp.isoformat() if row.timestamp else None,
|
||||
"episode_id": row.episode_id,
|
||||
"cell": row.cell,
|
||||
"outcome": row.outcome,
|
||||
},
|
||||
}
|
||||
fh.write(json.dumps(record) + "\n")
|
||||
count += 1
|
||||
logger.info("Exported %d training records to %s", count, output_path)
|
||||
return count
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
# -- Storage management --------------------------------------------------
|
||||
|
||||
def rotate(self, max_age_days: int = 90) -> int:
|
||||
"""Delete command log entries older than *max_age_days*.
|
||||
|
||||
Returns the number of rows deleted.
|
||||
"""
|
||||
cutoff = datetime.now(UTC) - timedelta(days=max_age_days)
|
||||
session = self._get_session()
|
||||
try:
|
||||
deleted = (
|
||||
session.query(CommandLog)
|
||||
.filter(CommandLog.timestamp < cutoff)
|
||||
.delete(synchronize_session=False)
|
||||
)
|
||||
session.commit()
|
||||
logger.info("Rotated %d command log entries older than %s", deleted, cutoff)
|
||||
return deleted
|
||||
except Exception:
|
||||
session.rollback()
|
||||
raise
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
def count(self) -> int:
|
||||
"""Return the total number of command log entries."""
|
||||
session = self._get_session()
|
||||
try:
|
||||
return session.query(CommandLog).count()
|
||||
finally:
|
||||
session.close()
|
||||
|
||||
# -- Helpers -------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _row_to_dict(row: CommandLog) -> dict[str, Any]:
|
||||
return {
|
||||
"id": row.id,
|
||||
"timestamp": row.timestamp.isoformat() if row.timestamp else None,
|
||||
"command": row.command,
|
||||
"params": json.loads(row.params) if row.params else {},
|
||||
"reasoning": row.reasoning,
|
||||
"perception_snapshot": json.loads(row.perception_snapshot)
|
||||
if row.perception_snapshot
|
||||
else {},
|
||||
"outcome": row.outcome,
|
||||
"agent_id": row.agent_id,
|
||||
"episode_id": row.episode_id,
|
||||
"cell": row.cell,
|
||||
"protocol_version": row.protocol_version,
|
||||
"created_at": row.created_at.isoformat() if row.created_at else None,
|
||||
}
|
||||
Reference in New Issue
Block a user