Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 23s
Smoke Test / smoke (pull_request) Failing after 19s
Validate Config / YAML Lint (pull_request) Failing after 14s
Validate Config / JSON Validate (pull_request) Successful in 15s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 50s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Cron Syntax Check (pull_request) Successful in 11s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 9s
Validate Config / Shell Script Lint (pull_request) Failing after 37s
Validate Config / Playbook Schema Validation (pull_request) Successful in 11s
Architecture Lint / Lint Repository (pull_request) Failing after 12s
PR Checklist / pr-checklist (pull_request) Successful in 4m40s
- Add bin/request_log.py instrumentation library
- log_inference(): write rows to request_log table
- query_requests(): query recent telemetry with filters (agent, provider, model, status, hours)
- did_agent_call_provider(): answer "did agent X call provider Y in last N hours?"
- get_recent_activity_summary(): aggregate stats by agent/provider/model
- ensure_db(): auto-create DB and schema if missing
- CLI interface: `python3 bin/request_log.py log|query|did-call`
- DB path: ~/.local/timmy/request_log.db (configurable via REQUEST_LOG_PATH)
- Add tests/test_request_log.py with 13 passing tests
- test_ensure_db_creates_schema: verifies table + indexes creation
- test_log_inference_inserts_row: full-field insert
- test_log_inference_minimal_fields: required fields only
- test_log_inference_error_status: error status with message
- test_query_requests_filters_by_agent: agent filter
- test_query_requests_filters_by_provider: provider filter
- test_query_requests_time_window: hours parameter
- test_did_agent_call_provider_positive/negative_wrong_agent/negative_wrong_provider
- test_did_agent_call_provider_min_success_count
- test_log_and_query_by_status: status filter
- test_get_recent_activity_summary: view aggregation
The request_log schema and ansible deployment already existed.
This commit adds the missing instrumentation that actually populates it.
Usage example for agents:
```python
from request_log import log_inference
log_inference(
agent_name="codex-agent",
provider="anthropic",
model="claude-sonnet-4-20250514",
endpoint="/v1/messages",
tokens_in=prompt_tokens,
tokens_out=completion_tokens,
latency_ms=int(latency_s * 1000),
status="success"
)
```
Query example:
```python
from request_log import did_agent_call_provider
if did_agent_call_provider("codex-agent", "anthropic", hours=1):
print("Agent successfully called Anthropic in the last hour")
```
Closes #446
256 lines
7.3 KiB
Python
256 lines
7.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Request Log Telemetry — "Verify What Actually Happened"
|
|
|
|
Issue #446: [P2.5] request_log Telemetry Table
|
|
|
|
Every agent writes a row to request_log for every inference call.
|
|
No exceptions. No summarizing. Actual rows.
|
|
|
|
This module provides:
|
|
- log_inference(): write a telemetry row
|
|
- query_requests(): read recent telemetry
|
|
- did_agent_call_provider(): answer verification questions
|
|
|
|
Database: ~/.local/timmy/request_log.db
|
|
Override via REQUEST_LOG_PATH environment variable.
|
|
"""
|
|
|
|
import os
|
|
import sqlite3
|
|
import sys
|
|
import json
|
|
from datetime import datetime, timezone, timedelta
|
|
from pathlib import Path
|
|
from typing import Optional, Dict, Any, List
|
|
|
|
# Default DB location (matches ansible group_vars/wizards.yml)
|
|
DEFAULT_DB_PATH = Path.home() / ".local" / "timmy" / "request_log.db"
|
|
|
|
|
|
def get_db_path() -> Path:
|
|
"""Return the configured request_log database path."""
|
|
env_path = os.environ.get("REQUEST_LOG_PATH")
|
|
if env_path:
|
|
return Path(env_path).expanduser()
|
|
return DEFAULT_DB_PATH
|
|
|
|
|
|
def ensure_db() -> Path:
|
|
"""
|
|
Ensure the database and schema exist.
|
|
Creates the DB and schema if missing.
|
|
Returns the DB path.
|
|
"""
|
|
db_path = get_db_path()
|
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
if not db_path.exists():
|
|
# Create with schema
|
|
schema = """
|
|
CREATE TABLE IF NOT EXISTS request_log (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
timestamp TEXT NOT NULL DEFAULT (datetime('now')),
|
|
agent_name TEXT NOT NULL,
|
|
provider TEXT NOT NULL,
|
|
model TEXT NOT NULL,
|
|
endpoint TEXT NOT NULL,
|
|
tokens_in INTEGER,
|
|
tokens_out INTEGER,
|
|
latency_ms INTEGER,
|
|
status TEXT NOT NULL,
|
|
error_message TEXT
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_request_log_agent
|
|
ON request_log (agent_name, timestamp);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_request_log_provider
|
|
ON request_log (provider, timestamp);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_request_log_status
|
|
ON request_log (status, timestamp);
|
|
"""
|
|
conn = sqlite3.connect(str(db_path))
|
|
conn.executescript(schema)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
return db_path
|
|
|
|
|
|
def log_inference(
|
|
*,
|
|
agent_name: str,
|
|
provider: str,
|
|
model: str,
|
|
endpoint: str,
|
|
tokens_in: Optional[int] = None,
|
|
tokens_out: Optional[int] = None,
|
|
latency_ms: Optional[int] = None,
|
|
status: str = "success",
|
|
error_message: Optional[str] = None,
|
|
db_path: Optional[Path] = None,
|
|
) -> Optional[int]:
|
|
"""
|
|
Log a single inference request to the request_log table.
|
|
|
|
Args:
|
|
agent_name: Name of the agent making the call
|
|
provider: Provider name (anthropic, openrouter, ollama, etc.)
|
|
model: Model identifier
|
|
endpoint: API endpoint called
|
|
tokens_in: Input token count (optional but recommended)
|
|
tokens_out: Output token count (optional but recommended)
|
|
latency_ms: Latency in milliseconds (optional but recommended)
|
|
status: One of 'success', 'error', 'timeout', 'fallback'
|
|
error_message: Error text if status is error/timeout
|
|
db_path: Override DB path (for testing)
|
|
|
|
Returns:
|
|
Row ID if inserted, None on failure
|
|
"""
|
|
db = Path(db_path) if db_path else get_db_path()
|
|
|
|
try:
|
|
# Ensure DB exists
|
|
if not db.exists():
|
|
ensure_db()
|
|
|
|
conn = sqlite3.connect(str(db))
|
|
cursor = conn.cursor()
|
|
cursor.execute("""
|
|
INSERT INTO request_log
|
|
(timestamp, agent_name, provider, model, endpoint,
|
|
tokens_in, tokens_out, latency_ms, status, error_message)
|
|
VALUES (datetime('now'), ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""", (
|
|
agent_name, provider, model, endpoint,
|
|
tokens_in, tokens_out, latency_ms, status, error_message
|
|
))
|
|
row_id = cursor.lastrowid
|
|
conn.commit()
|
|
conn.close()
|
|
return row_id
|
|
except Exception as e:
|
|
# Never break production — swallow errors for telemetry
|
|
return None
|
|
|
|
|
|
def query_requests(
|
|
*,
|
|
agent_name: Optional[str] = None,
|
|
provider: Optional[str] = None,
|
|
model: Optional[str] = None,
|
|
hours: int = 1,
|
|
status: Optional[str] = None,
|
|
limit: int = 100,
|
|
db_path: Optional[Path] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""
|
|
Query recent inference logs.
|
|
|
|
Args:
|
|
agent_name: Filter by agent name
|
|
provider: Filter by provider
|
|
model: Filter by model
|
|
hours: Lookback window (default 1 hour)
|
|
status: Filter by status ('success', 'error', etc.)
|
|
limit: Max rows to return
|
|
db_path: Override DB path
|
|
|
|
Returns:
|
|
List of matching records as dicts
|
|
"""
|
|
db = Path(db_path) if db_path else get_db_path()
|
|
|
|
if not db.exists():
|
|
return []
|
|
|
|
conditions = ["timestamp > datetime('now', '-' || ? || ' hours')"]
|
|
params = [hours]
|
|
|
|
if agent_name:
|
|
conditions.append("agent_name = ?")
|
|
params.append(agent_name)
|
|
if provider:
|
|
conditions.append("provider = ?")
|
|
params.append(provider)
|
|
if model:
|
|
conditions.append("model = ?")
|
|
params.append(model)
|
|
if status:
|
|
conditions.append("status = ?")
|
|
params.append(status)
|
|
|
|
where_clause = " AND ".join(conditions)
|
|
|
|
try:
|
|
conn = sqlite3.connect(str(db))
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
cursor.execute(f"""
|
|
SELECT * FROM request_log
|
|
WHERE {where_clause}
|
|
ORDER BY timestamp DESC
|
|
LIMIT ?
|
|
""", tuple(params) + (limit,))
|
|
|
|
rows = [dict(row) for row in cursor.fetchall()]
|
|
conn.close()
|
|
return rows
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
def did_agent_call_provider(
|
|
agent_name: str,
|
|
provider: str,
|
|
hours: int = 1,
|
|
min_success_count: int = 1,
|
|
db_path: Optional[Path] = None,
|
|
) -> bool:
|
|
"""
|
|
Answer: "Did agent X actually call provider Y in the last N hours?"
|
|
|
|
Returns True if agent made at least min_success_count successful calls.
|
|
"""
|
|
rows = query_requests(
|
|
agent_name=agent_name,
|
|
provider=provider,
|
|
hours=hours,
|
|
status="success",
|
|
db_path=db_path,
|
|
)
|
|
return len(rows) >= min_success_count
|
|
|
|
|
|
def get_recent_activity_summary(hours: int = 1) -> Dict[str, Any]:
|
|
"""Get aggregate statistics for recent activity (uses v_recent_activity view if available)."""
|
|
db = get_db_path()
|
|
if not db.exists():
|
|
return {"error": "Database not found"}
|
|
|
|
try:
|
|
conn = sqlite3.connect(str(db))
|
|
conn.row_factory = sqlite3.Row
|
|
cursor = conn.cursor()
|
|
|
|
# Try the view first
|
|
try:
|
|
cursor.execute("""
|
|
SELECT agent_name, provider, model, status,
|
|
COUNT(*) as call_count, AVG(latency_ms) as avg_latency
|
|
FROM request_log
|
|
WHERE timestamp > datetime('now', '-' || ? || ' hours')
|
|
GROUP BY agent_name, provider, model, status
|
|
""", (hours,))
|
|
rows = [dict(row) for row in cursor.fetchall()]
|
|
conn.close()
|
|
return {"by_agent_provider": rows}
|
|
except Exception:
|
|
conn.close()
|
|
return {"error": "query failed"}
|
|
except Exception:
|
|
return {"error": "db error"}
|