Timmy-time-dashboard/src/spark/memory.py

"""Spark memory — SQLite-backed event capture and memory consolidation.

Captures swarm events (tasks posted, bids, assignments, completions,
failures) and distills them into higher-level memories with importance
scoring.  This is the persistence layer for Spark Intelligence.

Tables
------
spark_events   — raw event log (every swarm event)
spark_memories — consolidated insights extracted from event patterns
"""

import logging
import sqlite3
import uuid
from collections.abc import Generator
from contextlib import closing, contextmanager
from dataclasses import dataclass
from datetime import UTC, datetime
from pathlib import Path

logger = logging.getLogger(__name__)

DB_PATH = Path("data/spark.db")

# Importance thresholds
IMPORTANCE_LOW = 0.3
IMPORTANCE_MEDIUM = 0.6
IMPORTANCE_HIGH = 0.8


@dataclass
class SparkEvent:
    """A single captured swarm event."""

    id: str
    event_type: str  # task_posted, bid, assignment, completion, failure
    agent_id: str | None
    task_id: str | None
    description: str
    data: str  # JSON payload
    importance: float  # 0.0–1.0
    created_at: str


@dataclass
class SparkMemory:
    """A consolidated memory distilled from event patterns."""

    id: str
    memory_type: str  # pattern, insight, anomaly
    subject: str  # agent_id or "system"
    content: str  # Human-readable insight
    confidence: float  # 0.0–1.0
    source_events: int  # How many events contributed
    created_at: str
    expires_at: str | None


@contextmanager
def _get_conn() -> Generator[sqlite3.Connection, None, None]:
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
    with closing(sqlite3.connect(str(DB_PATH))) as conn:
        conn.row_factory = sqlite3.Row
        conn.execute("PRAGMA journal_mode=WAL")
        conn.execute("PRAGMA busy_timeout=5000")
        conn.execute("""
            CREATE TABLE IF NOT EXISTS spark_events (
                id          TEXT PRIMARY KEY,
                event_type  TEXT NOT NULL,
                agent_id    TEXT,
                task_id     TEXT,
                description TEXT NOT NULL DEFAULT '',
                data        TEXT NOT NULL DEFAULT '{}',
                importance  REAL NOT NULL DEFAULT 0.5,
                created_at  TEXT NOT NULL
            )
            """)
        conn.execute("""
            CREATE TABLE IF NOT EXISTS spark_memories (
                id              TEXT PRIMARY KEY,
                memory_type     TEXT NOT NULL,
                subject         TEXT NOT NULL DEFAULT 'system',
                content         TEXT NOT NULL,
                confidence      REAL NOT NULL DEFAULT 0.5,
                source_events   INTEGER NOT NULL DEFAULT 0,
                created_at      TEXT NOT NULL,
                expires_at      TEXT
            )
            """)
        conn.execute("CREATE INDEX IF NOT EXISTS idx_events_type ON spark_events(event_type)")
        conn.execute("CREATE INDEX IF NOT EXISTS idx_events_agent ON spark_events(agent_id)")
        conn.execute("CREATE INDEX IF NOT EXISTS idx_events_task ON spark_events(task_id)")
        conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_subject ON spark_memories(subject)")
        conn.commit()
        yield conn


# ── Importance scoring ──────────────────────────────────────────────────────


def score_importance(event_type: str, data: dict) -> float:
    """Compute importance score for an event (0.0–1.0).

    High-importance events: failures, large bids, first-time patterns.
    Low-importance events: routine bids, repeated successful completions.
    """
    base_scores = {
        "task_posted": 0.4,
        "bid_submitted": 0.2,
        "task_assigned": 0.5,
        "task_completed": 0.6,
        "task_failed": 0.9,
        "agent_joined": 0.5,
        "prediction_result": 0.7,
    }
    score = base_scores.get(event_type, 0.5)

    # Boost for failures (always important to learn from)
    if event_type == "task_failed":
        score = min(1.0, score + 0.1)

    # Boost for high-value bids
    bid_sats = data.get("bid_sats", 0)
    if bid_sats and bid_sats > 80:
        score = min(1.0, score + 0.15)

    return round(score, 2)


# ── Event recording ─────────────────────────────────────────────────────────


def record_event(
    event_type: str,
    description: str,
    agent_id: str | None = None,
    task_id: str | None = None,
    data: str = "{}",
    importance: float | None = None,
) -> str:
    """Record a swarm event.  Returns the event id."""
    import json

    event_id = str(uuid.uuid4())
    now = datetime.now(UTC).isoformat()

    if importance is None:
        try:
            parsed = json.loads(data) if isinstance(data, str) else data
        except (json.JSONDecodeError, TypeError):
            parsed = {}
        importance = score_importance(event_type, parsed)

    with _get_conn() as conn:
        conn.execute(
            """
            INSERT INTO spark_events
                (id, event_type, agent_id, task_id, description, data, importance, created_at)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            """,
            (event_id, event_type, agent_id, task_id, description, data, importance, now),
        )
        conn.commit()

    # Bridge to unified event log so all events are queryable from one place
    try:
        from swarm.event_log import EventType as _ET
        from swarm.event_log import log_event as _log

        _log(
            _ET.SYSTEM_INFO,
            source=f"spark:{event_type}",
            data={"description": description, "importance": importance, "spark_event_id": event_id},
            task_id=task_id or "",
            agent_id=agent_id or "",
        )
    except Exception as exc:
        logger.debug("Spark event log error: %s", exc)
        pass  # Graceful — don't break spark if event_log is unavailable

    return event_id


def get_events(
    event_type: str | None = None,
    agent_id: str | None = None,
    task_id: str | None = None,
    limit: int = 100,
    min_importance: float = 0.0,
) -> list[SparkEvent]:
    """Query events with optional filters."""
    query = "SELECT * FROM spark_events WHERE importance >= ?"
    params: list = [min_importance]

    if event_type:
        query += " AND event_type = ?"
        params.append(event_type)
    if agent_id:
        query += " AND agent_id = ?"
        params.append(agent_id)
    if task_id:
        query += " AND task_id = ?"
        params.append(task_id)

    query += " ORDER BY created_at DESC LIMIT ?"
    params.append(limit)

    with _get_conn() as conn:
        rows = conn.execute(query, params).fetchall()
    return [
        SparkEvent(
            id=r["id"],
            event_type=r["event_type"],
            agent_id=r["agent_id"],
            task_id=r["task_id"],
            description=r["description"],
            data=r["data"],
            importance=r["importance"],
            created_at=r["created_at"],
        )
        for r in rows
    ]


def count_events(event_type: str | None = None) -> int:
    """Count events, optionally filtered by type."""
    with _get_conn() as conn:
        if event_type:
            row = conn.execute(
                "SELECT COUNT(*) FROM spark_events WHERE event_type = ?",
                (event_type,),
            ).fetchone()
        else:
            row = conn.execute("SELECT COUNT(*) FROM spark_events").fetchone()
    return row[0]


# ── Memory consolidation ───────────────────────────────────────────────────


def store_memory(
    memory_type: str,
    subject: str,
    content: str,
    confidence: float = 0.5,
    source_events: int = 0,
    expires_at: str | None = None,
) -> str:
    """Store a consolidated memory.  Returns the memory id."""
    mem_id = str(uuid.uuid4())
    now = datetime.now(UTC).isoformat()
    with _get_conn() as conn:
        conn.execute(
            """
            INSERT INTO spark_memories
                (id, memory_type, subject, content, confidence, source_events, created_at, expires_at)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            """,
            (mem_id, memory_type, subject, content, confidence, source_events, now, expires_at),
        )
        conn.commit()
    return mem_id


def get_memories(
    memory_type: str | None = None,
    subject: str | None = None,
    min_confidence: float = 0.0,
    limit: int = 50,
) -> list[SparkMemory]:
    """Query memories with optional filters."""
    query = "SELECT * FROM spark_memories WHERE confidence >= ?"
    params: list = [min_confidence]

    if memory_type:
        query += " AND memory_type = ?"
        params.append(memory_type)
    if subject:
        query += " AND subject = ?"
        params.append(subject)

    query += " ORDER BY created_at DESC LIMIT ?"
    params.append(limit)

    with _get_conn() as conn:
        rows = conn.execute(query, params).fetchall()
    return [
        SparkMemory(
            id=r["id"],
            memory_type=r["memory_type"],
            subject=r["subject"],
            content=r["content"],
            confidence=r["confidence"],
            source_events=r["source_events"],
            created_at=r["created_at"],
            expires_at=r["expires_at"],
        )
        for r in rows
    ]


def count_memories(memory_type: str | None = None) -> int:
    """Count memories, optionally filtered by type."""
    with _get_conn() as conn:
        if memory_type:
            row = conn.execute(
                "SELECT COUNT(*) FROM spark_memories WHERE memory_type = ?",
                (memory_type,),
            ).fetchone()
        else:
            row = conn.execute("SELECT COUNT(*) FROM spark_memories").fetchone()
    return row[0]