#!/usr/bin/env python3 """ Request Log Telemetry — "Verify What Actually Happened" Issue #446: [P2.5] request_log Telemetry Table Every agent writes a row to request_log for every inference call. No exceptions. No summarizing. Actual rows. This module provides: - log_inference(): write a telemetry row - query_requests(): read recent telemetry - did_agent_call_provider(): answer verification questions Database: ~/.local/timmy/request_log.db Override via REQUEST_LOG_PATH environment variable. """ import os import sqlite3 import sys import json from datetime import datetime, timezone, timedelta from pathlib import Path from typing import Optional, Dict, Any, List # Default DB location (matches ansible group_vars/wizards.yml) DEFAULT_DB_PATH = Path.home() / ".local" / "timmy" / "request_log.db" def get_db_path() -> Path: """Return the configured request_log database path.""" env_path = os.environ.get("REQUEST_LOG_PATH") if env_path: return Path(env_path).expanduser() return DEFAULT_DB_PATH def ensure_db() -> Path: """ Ensure the database and schema exist. Creates the DB and schema if missing. Returns the DB path. """ db_path = get_db_path() db_path.parent.mkdir(parents=True, exist_ok=True) if not db_path.exists(): # Create with schema schema = """ CREATE TABLE IF NOT EXISTS request_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, timestamp TEXT NOT NULL DEFAULT (datetime('now')), agent_name TEXT NOT NULL, provider TEXT NOT NULL, model TEXT NOT NULL, endpoint TEXT NOT NULL, tokens_in INTEGER, tokens_out INTEGER, latency_ms INTEGER, status TEXT NOT NULL, error_message TEXT ); CREATE INDEX IF NOT EXISTS idx_request_log_agent ON request_log (agent_name, timestamp); CREATE INDEX IF NOT EXISTS idx_request_log_provider ON request_log (provider, timestamp); CREATE INDEX IF NOT EXISTS idx_request_log_status ON request_log (status, timestamp); """ conn = sqlite3.connect(str(db_path)) conn.executescript(schema) conn.commit() conn.close() return db_path def log_inference( *, agent_name: str, provider: str, model: str, endpoint: str, tokens_in: Optional[int] = None, tokens_out: Optional[int] = None, latency_ms: Optional[int] = None, status: str = "success", error_message: Optional[str] = None, db_path: Optional[Path] = None, ) -> Optional[int]: """ Log a single inference request to the request_log table. Args: agent_name: Name of the agent making the call provider: Provider name (anthropic, openrouter, ollama, etc.) model: Model identifier endpoint: API endpoint called tokens_in: Input token count (optional but recommended) tokens_out: Output token count (optional but recommended) latency_ms: Latency in milliseconds (optional but recommended) status: One of 'success', 'error', 'timeout', 'fallback' error_message: Error text if status is error/timeout db_path: Override DB path (for testing) Returns: Row ID if inserted, None on failure """ db = Path(db_path) if db_path else get_db_path() try: # Ensure DB exists if not db.exists(): ensure_db() conn = sqlite3.connect(str(db)) cursor = conn.cursor() cursor.execute(""" INSERT INTO request_log (timestamp, agent_name, provider, model, endpoint, tokens_in, tokens_out, latency_ms, status, error_message) VALUES (datetime('now'), ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( agent_name, provider, model, endpoint, tokens_in, tokens_out, latency_ms, status, error_message )) row_id = cursor.lastrowid conn.commit() conn.close() return row_id except Exception as e: # Never break production — swallow errors for telemetry return None def query_requests( *, agent_name: Optional[str] = None, provider: Optional[str] = None, model: Optional[str] = None, hours: int = 1, status: Optional[str] = None, limit: int = 100, db_path: Optional[Path] = None, ) -> List[Dict[str, Any]]: """ Query recent inference logs. Args: agent_name: Filter by agent name provider: Filter by provider model: Filter by model hours: Lookback window (default 1 hour) status: Filter by status ('success', 'error', etc.) limit: Max rows to return db_path: Override DB path Returns: List of matching records as dicts """ db = Path(db_path) if db_path else get_db_path() if not db.exists(): return [] conditions = ["timestamp > datetime('now', '-' || ? || ' hours')"] params = [hours] if agent_name: conditions.append("agent_name = ?") params.append(agent_name) if provider: conditions.append("provider = ?") params.append(provider) if model: conditions.append("model = ?") params.append(model) if status: conditions.append("status = ?") params.append(status) where_clause = " AND ".join(conditions) try: conn = sqlite3.connect(str(db)) conn.row_factory = sqlite3.Row cursor = conn.cursor() cursor.execute(f""" SELECT * FROM request_log WHERE {where_clause} ORDER BY timestamp DESC LIMIT ? """, tuple(params) + (limit,)) rows = [dict(row) for row in cursor.fetchall()] conn.close() return rows except Exception: return [] def did_agent_call_provider( agent_name: str, provider: str, hours: int = 1, min_success_count: int = 1, db_path: Optional[Path] = None, ) -> bool: """ Answer: "Did agent X actually call provider Y in the last N hours?" Returns True if agent made at least min_success_count successful calls. """ rows = query_requests( agent_name=agent_name, provider=provider, hours=hours, status="success", db_path=db_path, ) return len(rows) >= min_success_count def get_recent_activity_summary(hours: int = 1) -> Dict[str, Any]: """Get aggregate statistics for recent activity (uses v_recent_activity view if available).""" db = get_db_path() if not db.exists(): return {"error": "Database not found"} try: conn = sqlite3.connect(str(db)) conn.row_factory = sqlite3.Row cursor = conn.cursor() # Try the view first try: cursor.execute(""" SELECT agent_name, provider, model, status, COUNT(*) as call_count, AVG(latency_ms) as avg_latency FROM request_log WHERE timestamp > datetime('now', '-' || ? || ' hours') GROUP BY agent_name, provider, model, status """, (hours,)) rows = [dict(row) for row in cursor.fetchall()] conn.close() return {"by_agent_provider": rows} except Exception: conn.close() return {"error": "query failed"} except Exception: return {"error": "db error"}