Files
timmy-tower/artifacts/api-server/src/lib/metrics.ts
Alexander Whitestone 2e4c3df509
Some checks failed
CI / Typecheck & Lint (pull_request) Failing after 0s
feat: add API observability — request IDs, log filtering, HTTP counters
- Add request-id middleware: assigns X-Request-Id (reuses upstream header
  or generates UUID), stored on res.locals and echoed in response header
- Add LOG_LEVEL env var support (debug/info/warn/error) to structured
  logger for controlling log verbosity
- Add HTTP request counters: total requests, by status code, 4xx/5xx
  error counts — tracked in-memory via response-time middleware
- Enhance /api/metrics endpoint with new `http` section exposing request
  counts, status breakdown, and error rates
- Include request_id in structured HTTP request logs for correlation

Fixes #57

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-22 21:05:42 -04:00

122 lines
3.3 KiB
TypeScript

import { db, jobs, invoices } from "@workspace/db";
import { sql } from "drizzle-orm";
import { latencyHistogram, type BucketStats } from "./histogram.js";
import { requestCounters, type RequestCountsSnapshot } from "./request-counters.js";
export interface JobStateCounts {
awaiting_eval: number;
awaiting_work: number;
complete: number;
rejected: number;
failed: number;
}
export interface MetricsSnapshot {
uptime_s: number;
http: RequestCountsSnapshot;
jobs: {
total: number;
by_state: JobStateCounts;
};
invoices: {
total: number;
paid: number;
conversion_rate: number | null;
};
earnings: {
total_sats: number;
};
latency: {
eval_phase: BucketStats | null;
work_phase: BucketStats | null;
routes: Record<string, BucketStats>;
};
}
const START_TIME = Date.now();
export class MetricsService {
async snapshot(): Promise<MetricsSnapshot> {
const [jobsByState, invoiceCounts, earningsRow] = await Promise.all([
db
.select({
state: jobs.state,
count: sql<number>`cast(count(*) as int)`,
})
.from(jobs)
.groupBy(jobs.state),
db
.select({
total: sql<number>`cast(count(*) as int)`,
paid: sql<number>`cast(sum(case when paid then 1 else 0 end) as int)`,
})
.from(invoices),
db
.select({
total_sats: sql<number>`cast(coalesce(sum(actual_amount_sats), 0) as int)`,
})
.from(jobs),
]);
// Group raw DB states into operational state keys
const rawCounts: Record<string, number> = {};
let jobsTotal = 0;
for (const row of jobsByState) {
const n = Number(row.count);
rawCounts[row.state] = (rawCounts[row.state] ?? 0) + n;
jobsTotal += n;
}
const byState: JobStateCounts = {
awaiting_eval: (rawCounts["awaiting_eval_payment"] ?? 0) + (rawCounts["evaluating"] ?? 0),
awaiting_work: (rawCounts["awaiting_work_payment"] ?? 0) + (rawCounts["executing"] ?? 0),
complete: rawCounts["complete"] ?? 0,
rejected: rawCounts["rejected"] ?? 0,
failed: rawCounts["failed"] ?? 0,
};
const invRow = invoiceCounts[0] ?? { total: 0, paid: 0 };
const invTotal = Number(invRow.total);
const invPaid = Number(invRow.paid);
const conversionRate = invTotal > 0 ? invPaid / invTotal : null;
const totalSats = Number(earningsRow[0]?.total_sats ?? 0);
const allRoutes = latencyHistogram.snapshot();
const evalPhase = allRoutes["eval_phase"] ?? null;
const workPhase = allRoutes["work_phase"] ?? null;
const routeLatency: Record<string, BucketStats> = {};
for (const [key, stats] of Object.entries(allRoutes)) {
if (key !== "eval_phase" && key !== "work_phase") {
routeLatency[key] = stats;
}
}
return {
uptime_s: Math.floor((Date.now() - START_TIME) / 1000),
http: requestCounters.snapshot(),
jobs: {
total: jobsTotal,
by_state: byState,
},
invoices: {
total: invTotal,
paid: invPaid,
conversion_rate: conversionRate,
},
earnings: {
total_sats: totalSats,
},
latency: {
eval_phase: evalPhase,
work_phase: workPhase,
routes: routeLatency,
},
};
}
}
export const metricsService = new MetricsService();