forked from Rockachopa/Timmy-time-dashboard
Implement two foundational infrastructure pieces for the Morrowind integration: 1. Perception/Command Protocol (Issue #859): - Formal spec document with JSON schemas, API contracts, versioning strategy - Engine-agnostic design following the Falsework Rule - Pydantic v2 models (PerceptionOutput, CommandInput) for runtime validation 2. SQLite Command Log + Training Pipeline (Issue #855): - SQLAlchemy model for command_log table with full indexing - CommandLogger class with log_command(), query(), export_training_data() - TrainingExporter with chat-completion, episode, and instruction formats - Storage management (rotation/archival) utilities - Alembic migration for the new table Includes 39 passing tests covering schema validation, logging, querying, and export. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
308 lines
10 KiB
Python
308 lines
10 KiB
Python
"""SQLite command log for the Morrowind Perception/Command protocol.
|
|
|
|
Every heartbeat cycle is logged — the resulting dataset serves as organic
|
|
training data for local model fine-tuning (Phase 7+).
|
|
|
|
Usage::
|
|
|
|
from infrastructure.morrowind.command_log import CommandLogger
|
|
|
|
logger = CommandLogger() # uses project default DB
|
|
logger.log_command(command_input, perception_snapshot)
|
|
results = logger.query(command_type="move_to", limit=100)
|
|
logger.export_training_data("export.jsonl")
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from datetime import UTC, datetime, timedelta
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from sqlalchemy import (
|
|
Column,
|
|
DateTime,
|
|
Index,
|
|
Integer,
|
|
String,
|
|
Text,
|
|
create_engine,
|
|
)
|
|
from sqlalchemy.orm import Session, sessionmaker
|
|
|
|
from src.dashboard.models.database import Base
|
|
|
|
from .schemas import CommandInput, CommandType, PerceptionOutput
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Default database path — same SQLite file as the rest of the project.
|
|
DEFAULT_DB_PATH = Path("./data/timmy_calm.db")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SQLAlchemy model
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class CommandLog(Base):
|
|
"""Persisted command log entry.
|
|
|
|
Schema columns mirror the requirements from Issue #855:
|
|
timestamp, command, params, reasoning, perception_snapshot,
|
|
outcome, episode_id.
|
|
"""
|
|
|
|
__tablename__ = "command_log"
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
|
|
timestamp = Column(
|
|
DateTime, nullable=False, default=lambda: datetime.now(UTC), index=True
|
|
)
|
|
command = Column(String(64), nullable=False, index=True)
|
|
params = Column(Text, nullable=False, default="{}")
|
|
reasoning = Column(Text, nullable=False, default="")
|
|
|
|
perception_snapshot = Column(Text, nullable=False, default="{}")
|
|
outcome = Column(Text, nullable=True)
|
|
|
|
agent_id = Column(String(64), nullable=False, default="timmy", index=True)
|
|
episode_id = Column(String(128), nullable=True, index=True)
|
|
cell = Column(String(255), nullable=True, index=True)
|
|
protocol_version = Column(String(16), nullable=False, default="1.0.0")
|
|
|
|
created_at = Column(
|
|
DateTime, nullable=False, default=lambda: datetime.now(UTC)
|
|
)
|
|
|
|
__table_args__ = (
|
|
Index("ix_command_log_cmd_cell", "command", "cell"),
|
|
Index("ix_command_log_episode", "episode_id", "timestamp"),
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CommandLogger — high-level API
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class CommandLogger:
|
|
"""High-level interface for logging, querying, and exporting commands.
|
|
|
|
Args:
|
|
db_url: SQLAlchemy database URL. Defaults to the project SQLite path.
|
|
"""
|
|
|
|
def __init__(self, db_url: str | None = None) -> None:
|
|
if db_url is None:
|
|
DEFAULT_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
db_url = f"sqlite:///{DEFAULT_DB_PATH}"
|
|
self._engine = create_engine(
|
|
db_url, connect_args={"check_same_thread": False}
|
|
)
|
|
self._SessionLocal = sessionmaker(
|
|
autocommit=False, autoflush=False, bind=self._engine
|
|
)
|
|
# Ensure table exists.
|
|
Base.metadata.create_all(bind=self._engine, tables=[CommandLog.__table__])
|
|
|
|
def _get_session(self) -> Session:
|
|
return self._SessionLocal()
|
|
|
|
# -- Write ---------------------------------------------------------------
|
|
|
|
def log_command(
|
|
self,
|
|
command_input: CommandInput,
|
|
perception: PerceptionOutput | None = None,
|
|
outcome: str | None = None,
|
|
) -> int:
|
|
"""Persist a command to the log.
|
|
|
|
Returns the auto-generated row id.
|
|
"""
|
|
perception_json = perception.model_dump_json() if perception else "{}"
|
|
cell = perception.location.cell if perception else None
|
|
|
|
entry = CommandLog(
|
|
timestamp=command_input.timestamp,
|
|
command=command_input.command.value,
|
|
params=json.dumps(command_input.params),
|
|
reasoning=command_input.reasoning,
|
|
perception_snapshot=perception_json,
|
|
outcome=outcome,
|
|
agent_id=command_input.agent_id,
|
|
episode_id=command_input.episode_id,
|
|
cell=cell,
|
|
protocol_version=command_input.protocol_version,
|
|
)
|
|
|
|
session = self._get_session()
|
|
try:
|
|
session.add(entry)
|
|
session.commit()
|
|
session.refresh(entry)
|
|
row_id: int = entry.id
|
|
return row_id
|
|
except Exception:
|
|
session.rollback()
|
|
raise
|
|
finally:
|
|
session.close()
|
|
|
|
# -- Read ----------------------------------------------------------------
|
|
|
|
def query(
|
|
self,
|
|
*,
|
|
command_type: str | CommandType | None = None,
|
|
cell: str | None = None,
|
|
episode_id: str | None = None,
|
|
agent_id: str | None = None,
|
|
since: datetime | None = None,
|
|
until: datetime | None = None,
|
|
limit: int = 100,
|
|
offset: int = 0,
|
|
) -> list[dict[str, Any]]:
|
|
"""Query command log entries with optional filters.
|
|
|
|
Returns a list of dicts (serialisable to JSON).
|
|
"""
|
|
session = self._get_session()
|
|
try:
|
|
q = session.query(CommandLog)
|
|
|
|
if command_type is not None:
|
|
q = q.filter(CommandLog.command == str(command_type))
|
|
if cell is not None:
|
|
q = q.filter(CommandLog.cell == cell)
|
|
if episode_id is not None:
|
|
q = q.filter(CommandLog.episode_id == episode_id)
|
|
if agent_id is not None:
|
|
q = q.filter(CommandLog.agent_id == agent_id)
|
|
if since is not None:
|
|
q = q.filter(CommandLog.timestamp >= since)
|
|
if until is not None:
|
|
q = q.filter(CommandLog.timestamp <= until)
|
|
|
|
q = q.order_by(CommandLog.timestamp.desc())
|
|
q = q.offset(offset).limit(limit)
|
|
|
|
rows = q.all()
|
|
return [self._row_to_dict(row) for row in rows]
|
|
finally:
|
|
session.close()
|
|
|
|
# -- Export --------------------------------------------------------------
|
|
|
|
def export_training_data(
|
|
self,
|
|
output_path: str | Path,
|
|
*,
|
|
episode_id: str | None = None,
|
|
since: datetime | None = None,
|
|
until: datetime | None = None,
|
|
) -> int:
|
|
"""Export command log entries as a JSONL file for fine-tuning.
|
|
|
|
Each line is a JSON object with ``perception`` (input) and
|
|
``command`` + ``reasoning`` (target output).
|
|
|
|
Returns the number of rows exported.
|
|
"""
|
|
output_path = Path(output_path)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
session = self._get_session()
|
|
try:
|
|
q = session.query(CommandLog)
|
|
if episode_id is not None:
|
|
q = q.filter(CommandLog.episode_id == episode_id)
|
|
if since is not None:
|
|
q = q.filter(CommandLog.timestamp >= since)
|
|
if until is not None:
|
|
q = q.filter(CommandLog.timestamp <= until)
|
|
q = q.order_by(CommandLog.timestamp.asc())
|
|
|
|
count = 0
|
|
with open(output_path, "w", encoding="utf-8") as fh:
|
|
for row in q.yield_per(500):
|
|
record = {
|
|
"input": {
|
|
"perception": json.loads(row.perception_snapshot),
|
|
},
|
|
"output": {
|
|
"command": row.command,
|
|
"params": json.loads(row.params),
|
|
"reasoning": row.reasoning,
|
|
},
|
|
"metadata": {
|
|
"timestamp": row.timestamp.isoformat() if row.timestamp else None,
|
|
"episode_id": row.episode_id,
|
|
"cell": row.cell,
|
|
"outcome": row.outcome,
|
|
},
|
|
}
|
|
fh.write(json.dumps(record) + "\n")
|
|
count += 1
|
|
logger.info("Exported %d training records to %s", count, output_path)
|
|
return count
|
|
finally:
|
|
session.close()
|
|
|
|
# -- Storage management --------------------------------------------------
|
|
|
|
def rotate(self, max_age_days: int = 90) -> int:
|
|
"""Delete command log entries older than *max_age_days*.
|
|
|
|
Returns the number of rows deleted.
|
|
"""
|
|
cutoff = datetime.now(UTC) - timedelta(days=max_age_days)
|
|
session = self._get_session()
|
|
try:
|
|
deleted = (
|
|
session.query(CommandLog)
|
|
.filter(CommandLog.timestamp < cutoff)
|
|
.delete(synchronize_session=False)
|
|
)
|
|
session.commit()
|
|
logger.info("Rotated %d command log entries older than %s", deleted, cutoff)
|
|
return deleted
|
|
except Exception:
|
|
session.rollback()
|
|
raise
|
|
finally:
|
|
session.close()
|
|
|
|
def count(self) -> int:
|
|
"""Return the total number of command log entries."""
|
|
session = self._get_session()
|
|
try:
|
|
return session.query(CommandLog).count()
|
|
finally:
|
|
session.close()
|
|
|
|
# -- Helpers -------------------------------------------------------------
|
|
|
|
@staticmethod
|
|
def _row_to_dict(row: CommandLog) -> dict[str, Any]:
|
|
return {
|
|
"id": row.id,
|
|
"timestamp": row.timestamp.isoformat() if row.timestamp else None,
|
|
"command": row.command,
|
|
"params": json.loads(row.params) if row.params else {},
|
|
"reasoning": row.reasoning,
|
|
"perception_snapshot": json.loads(row.perception_snapshot)
|
|
if row.perception_snapshot
|
|
else {},
|
|
"outcome": row.outcome,
|
|
"agent_id": row.agent_id,
|
|
"episode_id": row.episode_id,
|
|
"cell": row.cell,
|
|
"protocol_version": row.protocol_version,
|
|
"created_at": row.created_at.isoformat() if row.created_at else None,
|
|
}
|