fix(testkit): macOS compat + fix test 8c ordering (#24)

2026-03-18 21:01:13 -04:00
parent ca94c0a9e5
commit 83a2ec19e2
59 changed files with 4458 additions and 454 deletions
--- a/artifacts/api-server/src/app.ts
+++ b/artifacts/api-server/src/app.ts
@@ -1,14 +1,47 @@
 import express, { type Express } from "express";
 import cors from "cors";
-import router from "./routes";
+import router from "./routes/index.js";
+import { responseTimeMiddleware } from "./middlewares/response-time.js";

 const app: Express = express();

 app.set("trust proxy", 1);

-app.use(cors());
+// ── CORS (#5) ────────────────────────────────────────────────────────────────
+// CORS_ORIGINS = comma-separated list of allowed origins.
+// Default in production: alexanderwhitestone.com (and www. variant).
+// Default in development: all origins permitted.
+const isProd = process.env["NODE_ENV"] === "production";
+
+const rawOrigins = process.env["CORS_ORIGINS"];
+const allowedOrigins: string[] = rawOrigins
+  ? rawOrigins.split(",").map((o) => o.trim()).filter(Boolean)
+  : isProd
+    ? ["https://alexanderwhitestone.com", "https://www.alexanderwhitestone.com"]
+    : [];
+
+app.use(
+  cors({
+    origin:
+      allowedOrigins.length === 0
+        ? true
+        : (origin, callback) => {
+            if (!origin || allowedOrigins.includes(origin)) {
+              callback(null, true);
+            } else {
+              callback(new Error(`CORS: origin '${origin}' not allowed`));
+            }
+          },
+    credentials: true,
+    methods: ["GET", "POST", "PATCH", "DELETE", "OPTIONS"],
+    allowedHeaders: ["Content-Type", "Authorization", "X-Session-Token"],
+    exposedHeaders: ["X-Session-Token"],
+  }),
+);
+
 app.use(express.json());
 app.use(express.urlencoded({ extended: true }));
+app.use(responseTimeMiddleware);

 app.use("/api", router);

--- a/artifacts/api-server/src/index.ts
+++ b/artifacts/api-server/src/index.ts
@@ -1,4 +1,5 @@
 import app from "./app";
+import { rootLogger } from "./lib/logger.js";

 const rawPort = process.env["PORT"];

@@ -15,9 +16,9 @@ if (Number.isNaN(port) || port <= 0) {
 }

 app.listen(port, () => {
-  console.log(`Server listening on port ${port}`);
+  rootLogger.info("server started", { port });
  const domain = process.env["REPLIT_DEV_DOMAIN"];
  if (domain) {
-    console.log(`Public UI: https://${domain}/api/ui`);
+    rootLogger.info("public url", { url: `https://${domain}/api/ui` });
  }
 });
--- a/artifacts/api-server/src/lib/agent.ts
+++ b/artifacts/api-server/src/lib/agent.ts
@@ -1,4 +1,6 @@
-import { anthropic } from "@workspace/integrations-anthropic-ai";
+import { makeLogger } from "./logger.js";
+
+const logger = makeLogger("agent");

 export interface EvalResult {
  accepted: boolean;
@@ -18,17 +20,79 @@ export interface AgentConfig {
  workModel?: string;
 }

+// ── Stub mode detection ───────────────────────────────────────────────────────
+// If Anthropic credentials are absent, all AI calls return canned responses so
+// the server starts and exercises the full payment/state-machine flow without
+// a real API key. This mirrors the LNbits stub pattern.
+const STUB_MODE =
+  !process.env["AI_INTEGRATIONS_ANTHROPIC_API_KEY"] ||
+  !process.env["AI_INTEGRATIONS_ANTHROPIC_BASE_URL"];
+
+if (STUB_MODE) {
+  logger.warn("no Anthropic key — running in STUB mode", { component: "agent", stub: true });
+}
+
+const STUB_EVAL: EvalResult = {
+  accepted: true,
+  reason: "Stub: request accepted for processing.",
+  inputTokens: 0,
+  outputTokens: 0,
+};
+
+const STUB_RESULT =
+  "Stub response: Timmy is running in stub mode (no Anthropic API key). " +
+  "Configure AI_INTEGRATIONS_ANTHROPIC_API_KEY to enable real AI responses.";
+
+// ── Lazy client ───────────────────────────────────────────────────────────────
+// Minimal local interface — avoids importing @anthropic-ai/sdk types directly.
+// Dynamic import avoids the module-level throw in the integrations client when
+// env vars are absent (the client.ts guard runs at module evaluation time).
+interface AnthropicLike {
+  messages: {
+    create(params: Record<string, unknown>): Promise<{
+      content: Array<{ type: string; text?: string }>;
+      usage: { input_tokens: number; output_tokens: number };
+    }>;
+    stream(params: Record<string, unknown>): AsyncIterable<{
+      type: string;
+      delta?: { type: string; text?: string };
+      usage?: { output_tokens: number };
+      message?: { usage: { input_tokens: number } };
+    }>;
+  };
+}
+
+let _anthropic: AnthropicLike | null = null;
+
+async function getClient(): Promise<AnthropicLike> {
+  if (_anthropic) return _anthropic;
+  // @ts-expect-error -- TS6305: integrations-anthropic-ai exports src directly; project-reference build not required at runtime
+  const mod = (await import("@workspace/integrations-anthropic-ai")) as { anthropic: AnthropicLike };
+  _anthropic = mod.anthropic;
+  return _anthropic;
+}
+
+// ── AgentService ─────────────────────────────────────────────────────────────
+
 export class AgentService {
  readonly evalModel: string;
  readonly workModel: string;
+  readonly stubMode: boolean = STUB_MODE;

  constructor(config?: AgentConfig) {
-    this.evalModel = config?.evalModel ?? process.env.EVAL_MODEL ?? "claude-haiku-4-5";
-    this.workModel = config?.workModel ?? process.env.WORK_MODEL ?? "claude-sonnet-4-6";
+    this.evalModel = config?.evalModel ?? process.env["EVAL_MODEL"] ?? "claude-haiku-4-5";
+    this.workModel = config?.workModel ?? process.env["WORK_MODEL"] ?? "claude-sonnet-4-6";
  }

  async evaluateRequest(requestText: string): Promise<EvalResult> {
-    const message = await anthropic.messages.create({
+    if (STUB_MODE) {
+      // Simulate a short eval delay so state-machine tests are realistic
+      await new Promise((r) => setTimeout(r, 300));
+      return { ...STUB_EVAL };
+    }
+
+    const client = await getClient();
+    const message = await client.messages.create({
      model: this.evalModel,
      max_tokens: 8192,
      system: `You are Timmy, an AI agent gatekeeper. Evaluate whether a request is acceptable to act on.
@@ -45,10 +109,10 @@ Respond ONLY with valid JSON: {"accepted": true, "reason": "..."} or {"accepted"

    let parsed: { accepted: boolean; reason: string };
    try {
-      const raw = block.text.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
+      const raw = block.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
      parsed = JSON.parse(raw) as { accepted: boolean; reason: string };
    } catch {
-      throw new Error(`Failed to parse eval JSON: ${block.text}`);
+      throw new Error(`Failed to parse eval JSON: ${block.text!}`);
    }

    return {
@@ -60,7 +124,13 @@ Respond ONLY with valid JSON: {"accepted": true, "reason": "..."} or {"accepted"
  }

  async executeWork(requestText: string): Promise<WorkResult> {
-    const message = await anthropic.messages.create({
+    if (STUB_MODE) {
+      await new Promise((r) => setTimeout(r, 500));
+      return { result: STUB_RESULT, inputTokens: 0, outputTokens: 0 };
+    }
+
+    const client = await getClient();
+    const message = await client.messages.create({
      model: this.workModel,
      max_tokens: 8192,
      system: `You are Timmy, a capable AI agent. A user has paid for you to handle their request.
@@ -74,11 +144,61 @@ Fulfill it thoroughly and helpfully. Be concise yet complete.`,
    }

    return {
-      result: block.text,
+      result: block.text!,
      inputTokens: message.usage.input_tokens,
      outputTokens: message.usage.output_tokens,
    };
  }
+
+  /**
+   * Streaming variant of executeWork (#3). Calls onChunk for every text delta.
+   * In stub mode, emits the canned response word-by-word to exercise the SSE
+   * path end-to-end without a real Anthropic key.
+   */
+  async executeWorkStreaming(
+    requestText: string,
+    onChunk: (delta: string) => void,
+  ): Promise<WorkResult> {
+    if (STUB_MODE) {
+      const words = STUB_RESULT.split(" ");
+      for (const word of words) {
+        const delta = word + " ";
+        onChunk(delta);
+        await new Promise((r) => setTimeout(r, 40));
+      }
+      return { result: STUB_RESULT, inputTokens: 0, outputTokens: 0 };
+    }
+
+    const client = await getClient();
+    let fullText = "";
+    let inputTokens = 0;
+    let outputTokens = 0;
+
+    const stream = client.messages.stream({
+      model: this.workModel,
+      max_tokens: 8192,
+      system: `You are Timmy, a capable AI agent. A user has paid for you to handle their request.
+Fulfill it thoroughly and helpfully. Be concise yet complete.`,
+      messages: [{ role: "user", content: requestText }],
+    });
+
+    for await (const event of stream) {
+      if (
+        event.type === "content_block_delta" &&
+        event.delta?.type === "text_delta"
+      ) {
+        const delta = event.delta!.text ?? "";
+        fullText += delta;
+        onChunk(delta);
+      } else if (event.type === "message_delta" && event.usage) {
+        outputTokens = event.usage!.output_tokens;
+      } else if (event.type === "message_start" && event.message?.usage) {
+        inputTokens = event.message!.usage.input_tokens;
+      }
+    }
+
+    return { result: fullText, inputTokens, outputTokens };
+  }
 }

 export const agentService = new AgentService();
--- a/artifacts/api-server/src/lib/btc-oracle.ts
+++ b/artifacts/api-server/src/lib/btc-oracle.ts
@@ -1,3 +1,7 @@
+import { makeLogger } from "./logger.js";
+
+const logger = makeLogger("btc-oracle");
+
 const COINGECKO_URL =
  "https://api.coingecko.com/api/v3/simple/price?ids=bitcoin&vs_currencies=usd";

@@ -42,7 +46,10 @@ export async function getBtcPriceUsd(): Promise<number> {
    return price;
  } catch (err) {
    const fb = fallbackPrice();
-    console.warn(`[btc-oracle] Price fetch failed (using $${fb} fallback):`, err);
+    logger.warn("price fetch failed — using fallback", {
+      fallback_usd: fb,
+      error: err instanceof Error ? err.message : String(err),
+    });
    return fb;
  }
 }
--- a/artifacts/api-server/src/lib/event-bus.ts
+++ b/artifacts/api-server/src/lib/event-bus.ts
@@ -0,0 +1,34 @@
+import { EventEmitter } from "events";
+
+export type JobEvent =
+  | { type: "job:state"; jobId: string; state: string }
+  | { type: "job:paid"; jobId: string; invoiceType: "eval" | "work" }
+  | { type: "job:completed"; jobId: string; result: string }
+  | { type: "job:failed"; jobId: string; reason: string };
+
+export type SessionEvent =
+  | { type: "session:state"; sessionId: string; state: string }
+  | { type: "session:paid"; sessionId: string; amountSats: number }
+  | { type: "session:balance"; sessionId: string; balanceSats: number };
+
+export type BusEvent = JobEvent | SessionEvent;
+
+class EventBus extends EventEmitter {
+  emit(event: "bus", data: BusEvent): boolean;
+  emit(event: string, ...args: unknown[]): boolean {
+    return super.emit(event, ...args);
+  }
+
+  on(event: "bus", listener: (data: BusEvent) => void): this;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  on(event: string, listener: (...args: any[]) => void): this {
+    return super.on(event, listener);
+  }
+
+  publish(data: BusEvent): void {
+    this.emit("bus", data);
+  }
+}
+
+export const eventBus = new EventBus();
+eventBus.setMaxListeners(256);
--- a/artifacts/api-server/src/lib/histogram.ts
+++ b/artifacts/api-server/src/lib/histogram.ts
@@ -0,0 +1,45 @@
+const MAX_SAMPLES = 1_000;
+
+export interface BucketStats {
+  p50: number | null;
+  p95: number | null;
+  count: number;
+}
+
+export class LatencyHistogram {
+  private readonly buckets = new Map<string, number[]>();
+
+  record(route: string, durationMs: number): void {
+    let bucket = this.buckets.get(route);
+    if (!bucket) {
+      bucket = [];
+      this.buckets.set(route, bucket);
+    }
+    if (bucket.length >= MAX_SAMPLES) {
+      bucket.shift();
+    }
+    bucket.push(durationMs);
+  }
+
+  percentile(route: string, pct: number): number | null {
+    const bucket = this.buckets.get(route);
+    if (!bucket || bucket.length === 0) return null;
+    const sorted = [...bucket].sort((a, b) => a - b);
+    const idx = Math.floor((pct / 100) * sorted.length);
+    return sorted[Math.min(idx, sorted.length - 1)] ?? null;
+  }
+
+  snapshot(): Record<string, BucketStats> {
+    const out: Record<string, BucketStats> = {};
+    for (const [route, bucket] of this.buckets.entries()) {
+      out[route] = {
+        p50: this.percentile(route, 50),
+        p95: this.percentile(route, 95),
+        count: bucket.length,
+      };
+    }
+    return out;
+  }
+}
+
+export const latencyHistogram = new LatencyHistogram();
--- a/artifacts/api-server/src/lib/lnbits.ts
+++ b/artifacts/api-server/src/lib/lnbits.ts
@@ -1,4 +1,7 @@
 import { randomBytes } from "crypto";
+import { makeLogger } from "./logger.js";
+
+const logger = makeLogger("lnbits");

 export interface LNbitsInvoice {
  paymentHash: string;
@@ -22,7 +25,7 @@ export class LNbitsService {
    this.apiKey = config?.apiKey ?? process.env.LNBITS_API_KEY ?? "";
    this.stubMode = !this.url || !this.apiKey;
    if (this.stubMode) {
-      console.warn("[LNbitsService] No LNBITS_URL/LNBITS_API_KEY — running in STUB mode. Invoices are simulated.");
+      logger.warn("no LNBITS_URL/LNBITS_API_KEY — running in STUB mode", { stub: true });
    }
  }

@@ -32,7 +35,7 @@ export class LNbitsService {
    if (this.stubMode) {
      const paymentHash = randomBytes(32).toString("hex");
      const paymentRequest = `lnbcrt${amountSats}u1stub_${paymentHash.slice(0, 16)}`;
-      console.log(`[stub] Created invoice: ${amountSats} sats — "${memo}" — hash=${paymentHash}`);
+      logger.info("stub invoice created", { amountSats, memo, paymentHash });
      return { paymentHash, paymentRequest };
    }

@@ -113,7 +116,7 @@ export class LNbitsService {
  async payInvoice(bolt11: string): Promise<string> {
    if (this.stubMode) {
      const paymentHash = randomBytes(32).toString("hex");
-      console.log(`[stub] Paid outgoing invoice — fake hash=${paymentHash}`);
+      logger.info("stub outgoing payment", { paymentHash, invoiceType: "outbound" });
      return paymentHash;
    }

@@ -140,7 +143,7 @@ export class LNbitsService {
      throw new Error("stubMarkPaid called on a real LNbitsService instance");
    }
    stubPaidInvoices.add(paymentHash);
-    console.log(`[stub] Marked invoice paid: hash=${paymentHash}`);
+    logger.info("stub invoice marked paid", { paymentHash, invoiceType: "inbound" });
  }

  // ── Private helpers ──────────────────────────────────────────────────────
--- a/artifacts/api-server/src/lib/logger.ts
+++ b/artifacts/api-server/src/lib/logger.ts
@@ -0,0 +1,32 @@
+export type LogLevel = "debug" | "info" | "warn" | "error";
+
+export interface LogContext {
+  [key: string]: unknown;
+}
+
+function emit(level: LogLevel, component: string, message: string, ctx?: LogContext): void {
+  const line: Record<string, unknown> = {
+    timestamp: new Date().toISOString(),
+    level,
+    component,
+    message,
+    ...ctx,
+  };
+  const out = JSON.stringify(line);
+  if (level === "error" || level === "warn") {
+    console.error(out);
+  } else {
+    console.log(out);
+  }
+}
+
+export function makeLogger(component: string) {
+  return {
+    debug: (message: string, ctx?: LogContext) => emit("debug", component, message, ctx),
+    info:  (message: string, ctx?: LogContext) => emit("info",  component, message, ctx),
+    warn:  (message: string, ctx?: LogContext) => emit("warn",  component, message, ctx),
+    error: (message: string, ctx?: LogContext) => emit("error", component, message, ctx),
+  };
+}
+
+export const rootLogger = makeLogger("server");
--- a/artifacts/api-server/src/lib/metrics.ts
+++ b/artifacts/api-server/src/lib/metrics.ts
@@ -0,0 +1,118 @@
+import { db, jobs, invoices } from "@workspace/db";
+import { sql } from "drizzle-orm";
+import { latencyHistogram, type BucketStats } from "./histogram.js";
+
+export interface JobStateCounts {
+  awaiting_eval: number;
+  awaiting_work: number;
+  complete: number;
+  rejected: number;
+  failed: number;
+}
+
+export interface MetricsSnapshot {
+  uptime_s: number;
+  jobs: {
+    total: number;
+    by_state: JobStateCounts;
+  };
+  invoices: {
+    total: number;
+    paid: number;
+    conversion_rate: number | null;
+  };
+  earnings: {
+    total_sats: number;
+  };
+  latency: {
+    eval_phase: BucketStats | null;
+    work_phase: BucketStats | null;
+    routes: Record<string, BucketStats>;
+  };
+}
+
+const START_TIME = Date.now();
+
+export class MetricsService {
+  async snapshot(): Promise<MetricsSnapshot> {
+    const [jobsByState, invoiceCounts, earningsRow] = await Promise.all([
+      db
+        .select({
+          state: jobs.state,
+          count: sql<number>`cast(count(*) as int)`,
+        })
+        .from(jobs)
+        .groupBy(jobs.state),
+
+      db
+        .select({
+          total: sql<number>`cast(count(*) as int)`,
+          paid:  sql<number>`cast(sum(case when paid then 1 else 0 end) as int)`,
+        })
+        .from(invoices),
+
+      db
+        .select({
+          total_sats: sql<number>`cast(coalesce(sum(actual_amount_sats), 0) as int)`,
+        })
+        .from(jobs),
+    ]);
+
+    // Group raw DB states into operational state keys
+    const rawCounts: Record<string, number> = {};
+    let jobsTotal = 0;
+    for (const row of jobsByState) {
+      const n = Number(row.count);
+      rawCounts[row.state] = (rawCounts[row.state] ?? 0) + n;
+      jobsTotal += n;
+    }
+
+    const byState: JobStateCounts = {
+      awaiting_eval: (rawCounts["awaiting_eval_payment"] ?? 0) + (rawCounts["evaluating"] ?? 0),
+      awaiting_work: (rawCounts["awaiting_work_payment"] ?? 0) + (rawCounts["executing"] ?? 0),
+      complete:  rawCounts["complete"] ?? 0,
+      rejected:  rawCounts["rejected"] ?? 0,
+      failed:    rawCounts["failed"] ?? 0,
+    };
+
+    const invRow = invoiceCounts[0] ?? { total: 0, paid: 0 };
+    const invTotal = Number(invRow.total);
+    const invPaid  = Number(invRow.paid);
+    const conversionRate = invTotal > 0 ? invPaid / invTotal : null;
+
+    const totalSats = Number(earningsRow[0]?.total_sats ?? 0);
+
+    const allRoutes = latencyHistogram.snapshot();
+    const evalPhase = allRoutes["eval_phase"] ?? null;
+    const workPhase = allRoutes["work_phase"] ?? null;
+    const routeLatency: Record<string, BucketStats> = {};
+    for (const [key, stats] of Object.entries(allRoutes)) {
+      if (key !== "eval_phase" && key !== "work_phase") {
+        routeLatency[key] = stats;
+      }
+    }
+
+    return {
+      uptime_s: Math.floor((Date.now() - START_TIME) / 1000),
+      jobs: {
+        total: jobsTotal,
+        by_state: byState,
+      },
+      invoices: {
+        total: invTotal,
+        paid: invPaid,
+        conversion_rate: conversionRate,
+      },
+      earnings: {
+        total_sats: totalSats,
+      },
+      latency: {
+        eval_phase: evalPhase,
+        work_phase: workPhase,
+        routes: routeLatency,
+      },
+    };
+  }
+}
+
+export const metricsService = new MetricsService();
--- a/artifacts/api-server/src/lib/provisioner.ts
+++ b/artifacts/api-server/src/lib/provisioner.ts
@@ -1,6 +1,9 @@
 import { generateKeyPairSync } from "crypto";
 import { db, bootstrapJobs } from "@workspace/db";
 import { eq } from "drizzle-orm";
+import { makeLogger } from "./logger.js";
+
+const logger = makeLogger("provisioner");

 const DO_API_BASE = "https://api.digitalocean.com/v2";
 const TS_API_BASE = "https://api.tailscale.com/api/v2";
@@ -458,9 +461,7 @@ export class ProvisionerService {
    this.tsTailnet = process.env.TAILSCALE_TAILNET ?? "";
    this.stubMode = !this.doToken;
    if (this.stubMode) {
-      console.warn(
-        "[ProvisionerService] No DO_API_TOKEN — running in STUB mode. Provisioning is simulated.",
-      );
+      logger.warn("no DO_API_TOKEN — running in STUB mode", { stub: true });
    }
  }

@@ -477,7 +478,7 @@ export class ProvisionerService {
      }
    } catch (err) {
      const message = err instanceof Error ? err.message : "Provisioning failed";
-      console.error(`[ProvisionerService] Error for job ${bootstrapJobId}:`, message);
+      logger.error("provisioning failed", { bootstrapJobId, error: message });
      await db
        .update(bootstrapJobs)
        .set({ state: "failed", errorMessage: message, updatedAt: new Date() })
@@ -486,7 +487,7 @@ export class ProvisionerService {
  }

  private async stubProvision(jobId: string): Promise<void> {
-    console.log(`[stub] Simulating provisioning for bootstrap job ${jobId}...`);
+    logger.info("stub provisioning started", { bootstrapJobId: jobId });
    const { privateKey } = generateSshKeypair();
    await new Promise((r) => setTimeout(r, 2000));
    const fakeDropletId = String(Math.floor(Math.random() * 900_000_000 + 100_000_000));
@@ -502,11 +503,11 @@ export class ProvisionerService {
        updatedAt: new Date(),
      })
      .where(eq(bootstrapJobs.id, jobId));
-    console.log(`[stub] Bootstrap job ${jobId} marked ready with fake credentials.`);
+    logger.info("stub provisioning complete", { bootstrapJobId: jobId });
  }

  private async realProvision(jobId: string): Promise<void> {
-    console.log(`[ProvisionerService] Provisioning real node for job ${jobId}...`);
+    logger.info("real provisioning started", { bootstrapJobId: jobId });

    // 1. SSH keypair (pure node:crypto)
    const { publicKey, privateKey } = generateSshKeypair();
@@ -525,7 +526,7 @@ export class ProvisionerService {
      try {
        tailscaleAuthKey = await getTailscaleAuthKey(this.tsApiKey, this.tsTailnet);
      } catch (err) {
-        console.warn("[ProvisionerService] Tailscale key failed — continuing without:", err);
+        logger.warn("Tailscale key failed — continuing without Tailscale", { error: String(err) });
      }
    }

@@ -534,7 +535,7 @@ export class ProvisionerService {
    if (this.doVolumeGb > 0) {
      const volName = `timmy-data-${jobId.slice(0, 8)}`;
      volumeId = await createVolume(volName, this.doVolumeGb, this.doRegion, this.doToken);
-      console.log(`[ProvisionerService] Volume created: id=${volumeId} (${this.doVolumeGb} GB)`);
+      logger.info("block volume created", { volumeId, sizeGb: this.doVolumeGb });
    }

    // 5. Create droplet
@@ -556,11 +557,11 @@ export class ProvisionerService {
      dropletPayload,
    );
    const dropletId = dropletData.droplet.id;
-    console.log(`[ProvisionerService] Droplet created: id=${dropletId}`);
+    logger.info("droplet created", { bootstrapJobId: jobId, dropletId });

    // 6. Poll for public IP (up to 2 min)
    const nodeIp = await pollDropletIp(dropletId, this.doToken, 120_000);
-    console.log(`[ProvisionerService] Node IP: ${nodeIp ?? "(not yet assigned)"}`);
+    logger.info("node ip assigned", { bootstrapJobId: jobId, nodeIp: nodeIp ?? "(not yet assigned)" });

    // 7. Tailscale hostname
    const tailscaleHostname =
@@ -589,7 +590,7 @@ export class ProvisionerService {
      })
      .where(eq(bootstrapJobs.id, jobId));

-    console.log(`[ProvisionerService] Bootstrap job ${jobId} ready.`);
+    logger.info("real provisioning complete", { bootstrapJobId: jobId });
  }
 }

--- a/artifacts/api-server/src/lib/rate-limiter.ts
+++ b/artifacts/api-server/src/lib/rate-limiter.ts
@@ -0,0 +1,46 @@
+import { rateLimit, type Options } from "express-rate-limit";
+import { makeLogger } from "./logger.js";
+
+const logger = makeLogger("rate-limiter");
+
+function envInt(key: string, fallback: number): number {
+  const v = process.env[key];
+  const n = v ? parseInt(v, 10) : NaN;
+  return Number.isNaN(n) ? fallback : n;
+}
+
+function limiter(windowMs: number, max: number, overrideKey?: string) {
+  const resolvedMax = overrideKey ? envInt(overrideKey, max) : max;
+  return rateLimit({
+    windowMs,
+    max: resolvedMax,
+    standardHeaders: "draft-7",
+    legacyHeaders: false,
+    handler: (req, res) => {
+      const ip =
+        (req.headers["x-forwarded-for"] as string | undefined)?.split(",")[0]?.trim() ??
+        req.socket.remoteAddress ??
+        "unknown";
+      logger.warn("rate limit hit", {
+        route:    req.path,
+        method:   req.method,
+        ip,
+        retry_after_s: Math.ceil(windowMs / 1000),
+      });
+      res.status(429).json({
+        error: "rate_limited",
+        message: "Too many requests — please slow down.",
+        retryAfterSeconds: Math.ceil(windowMs / 1000),
+      });
+    },
+  } satisfies Partial<Options>);
+}
+
+// POST /api/jobs — 30 req/min per IP (configurable via RATE_LIMIT_JOBS)
+export const jobsLimiter = limiter(60_000, 30, "RATE_LIMIT_JOBS");
+
+// POST /api/sessions — 10 req/min per IP (configurable via RATE_LIMIT_SESSIONS)
+export const sessionsLimiter = limiter(60_000, 10, "RATE_LIMIT_SESSIONS");
+
+// POST /api/bootstrap — 3 req/hour per IP (configurable via RATE_LIMIT_BOOTSTRAP)
+export const bootstrapLimiter = limiter(60 * 60_000, 3, "RATE_LIMIT_BOOTSTRAP");
--- a/artifacts/api-server/src/lib/stream-registry.ts
+++ b/artifacts/api-server/src/lib/stream-registry.ts
@@ -0,0 +1,55 @@
+import { PassThrough } from "stream";
+
+interface StreamEntry {
+  stream: PassThrough;
+  createdAt: number;
+}
+
+class StreamRegistry {
+  private readonly streams = new Map<string, StreamEntry>();
+  private readonly TTL_MS = 5 * 60 * 1000;
+
+  register(jobId: string): PassThrough {
+    const existing = this.streams.get(jobId);
+    if (existing) {
+      existing.stream.destroy();
+    }
+    const stream = new PassThrough();
+    this.streams.set(jobId, { stream, createdAt: Date.now() });
+
+    stream.on("close", () => {
+      this.streams.delete(jobId);
+    });
+
+    this.evictExpired();
+    return stream;
+  }
+
+  get(jobId: string): PassThrough | null {
+    return this.streams.get(jobId)?.stream ?? null;
+  }
+
+  write(jobId: string, chunk: string): void {
+    this.streams.get(jobId)?.stream.write(chunk);
+  }
+
+  end(jobId: string): void {
+    const entry = this.streams.get(jobId);
+    if (entry) {
+      entry.stream.end();
+      this.streams.delete(jobId);
+    }
+  }
+
+  private evictExpired(): void {
+    const now = Date.now();
+    for (const [id, entry] of this.streams.entries()) {
+      if (now - entry.createdAt > this.TTL_MS) {
+        entry.stream.destroy();
+        this.streams.delete(id);
+      }
+    }
+  }
+}
+
+export const streamRegistry = new StreamRegistry();
--- a/artifacts/api-server/src/middlewares/response-time.ts
+++ b/artifacts/api-server/src/middlewares/response-time.ts
@@ -0,0 +1,31 @@
+import type { Request, Response, NextFunction } from "express";
+import { makeLogger } from "../lib/logger.js";
+import { latencyHistogram } from "../lib/histogram.js";
+
+const logger = makeLogger("http");
+
+export function responseTimeMiddleware(req: Request, res: Response, next: NextFunction): void {
+  const startedAt = Date.now();
+
+  res.on("finish", () => {
+    const durationMs = Date.now() - startedAt;
+    const route = req.route?.path as string | undefined;
+    const routeKey = `${req.method} ${route ?? req.path}`;
+
+    latencyHistogram.record(routeKey, durationMs);
+
+    logger.info("request", {
+      method:      req.method,
+      path:        req.path,
+      route:       route ?? null,
+      status:      res.statusCode,
+      duration_ms: durationMs,
+      ip:
+        (req.headers["x-forwarded-for"] as string | undefined)?.split(",")[0]?.trim() ??
+        req.socket.remoteAddress ??
+        null,
+    });
+  });
+
+  next();
+}
--- a/artifacts/api-server/src/routes/bootstrap.ts
+++ b/artifacts/api-server/src/routes/bootstrap.ts
@@ -5,6 +5,9 @@ import { eq, and } from "drizzle-orm";
 import { lnbitsService } from "../lib/lnbits.js";
 import { pricingService } from "../lib/pricing.js";
 import { provisionerService } from "../lib/provisioner.js";
+import { makeLogger } from "../lib/logger.js";
+
+const logger = makeLogger("bootstrap");

 const router = Router();

@@ -44,7 +47,7 @@ async function advanceBootstrapJob(job: BootstrapJob): Promise<BootstrapJob | nu
    return getBootstrapJobById(job.id);
  }

-  console.log(`[bootstrap] Payment confirmed for ${job.id} — starting provisioning`);
+  logger.info("bootstrap payment confirmed — starting provisioning", { bootstrapJobId: job.id });

  // Fire-and-forget: provisioner updates DB when done
  void provisionerService.provision(job.id);
--- a/artifacts/api-server/src/routes/demo.ts
+++ b/artifacts/api-server/src/routes/demo.ts
@@ -1,8 +1,10 @@
 import { Router, type Request, type Response } from "express";
 import { RunDemoQueryParams } from "@workspace/api-zod";
 import { agentService } from "../lib/agent.js";
+import { makeLogger } from "../lib/logger.js";

 const router = Router();
+const logger = makeLogger("demo");

 const RATE_LIMIT_MAX = 5;
 const RATE_LIMIT_WINDOW_MS = 60 * 60 * 1000;
@@ -35,6 +37,7 @@ router.get("/demo", async (req: Request, res: Response) => {
  const { allowed, resetAt } = checkRateLimit(ip);
  if (!allowed) {
    const secsUntilReset = Math.ceil((resetAt - Date.now()) / 1000);
+    logger.warn("demo rate limited", { ip, retry_after_s: secsUntilReset });
    res.status(429).json({
      error: `Rate limit exceeded. Try again in ${secsUntilReset}s (5 requests per hour per IP).`,
    });
@@ -52,11 +55,14 @@ router.get("/demo", async (req: Request, res: Response) => {
  }
  const { request } = parseResult.data;

+  logger.info("demo request received", { ip });
+
  try {
    const { result } = await agentService.executeWork(request);
    res.json({ result });
  } catch (err) {
    const message = err instanceof Error ? err.message : "Agent error";
+    logger.error("demo agent error", { ip, error: message });
    res.status(500).json({ error: message });
  }
 });
--- a/artifacts/api-server/src/routes/health.ts
+++ b/artifacts/api-server/src/routes/health.ts
@@ -1,11 +1,25 @@
-import { Router, type IRouter } from "express";
-import { HealthCheckResponse } from "@workspace/api-zod";
+import { Router, type IRouter, type Request, type Response } from "express";
+import { db, jobs } from "@workspace/db";
+import { sql } from "drizzle-orm";
+import { makeLogger } from "../lib/logger.js";

 const router: IRouter = Router();
+const logger = makeLogger("health");

-router.get("/healthz", (_req, res) => {
-  const data = HealthCheckResponse.parse({ status: "ok" });
-  res.json(data);
+const START_TIME = Date.now();
+
+router.get("/healthz", async (_req: Request, res: Response) => {
+  try {
+    const rows = await db.select({ total: sql<number>`cast(count(*) as int)` }).from(jobs);
+    const jobsTotal = Number(rows[0]?.total ?? 0);
+    const uptimeS = Math.floor((Date.now() - START_TIME) / 1000);
+    res.json({ status: "ok", uptime_s: uptimeS, jobs_total: jobsTotal });
+  } catch (err) {
+    const message = err instanceof Error ? err.message : "Health check failed";
+    logger.error("healthz db query failed", { error: message });
+    const uptimeS = Math.floor((Date.now() - START_TIME) / 1000);
+    res.status(503).json({ status: "error", uptime_s: uptimeS, error: message });
+  }
 });

 export default router;
--- a/artifacts/api-server/src/routes/index.ts
+++ b/artifacts/api-server/src/routes/index.ts
@@ -8,10 +8,12 @@ import devRouter from "./dev.js";
 import testkitRouter from "./testkit.js";
 import uiRouter from "./ui.js";
 import nodeDiagnosticsRouter from "./node-diagnostics.js";
+import metricsRouter from "./metrics.js";

 const router: IRouter = Router();

 router.use(healthRouter);
+router.use(metricsRouter);
 router.use(jobsRouter);
 router.use(bootstrapRouter);
 router.use(sessionsRouter);
--- a/artifacts/api-server/src/routes/jobs.ts
+++ b/artifacts/api-server/src/routes/jobs.ts
@@ -6,6 +6,13 @@ import { CreateJobBody, GetJobParams } from "@workspace/api-zod";
 import { lnbitsService } from "../lib/lnbits.js";
 import { agentService } from "../lib/agent.js";
 import { pricingService } from "../lib/pricing.js";
+import { jobsLimiter } from "../lib/rate-limiter.js";
+import { eventBus } from "../lib/event-bus.js";
+import { streamRegistry } from "../lib/stream-registry.js";
+import { makeLogger } from "../lib/logger.js";
+import { latencyHistogram } from "../lib/histogram.js";
+
+const logger = makeLogger("jobs");

 const router = Router();

@@ -24,8 +31,18 @@ async function getInvoiceById(id: string) {
 * return immediately with "evaluating" state instead of blocking 5-8 seconds.
 */
 async function runEvalInBackground(jobId: string, request: string): Promise<void> {
+  const evalStart = Date.now();
  try {
    const evalResult = await agentService.evaluateRequest(request);
+    latencyHistogram.record("eval_phase", Date.now() - evalStart);
+
+    logger.info("eval result", {
+      jobId,
+      accepted:     evalResult.accepted,
+      reason:       evalResult.reason,
+      inputTokens:  evalResult.inputTokens,
+      outputTokens: evalResult.outputTokens,
+    });

    if (evalResult.accepted) {
      const inputEst = pricingService.estimateInputTokens(request);
@@ -65,11 +82,13 @@ async function runEvalInBackground(jobId: string, request: string): Promise<void
          })
          .where(eq(jobs.id, jobId));
      });
+      eventBus.publish({ type: "job:state", jobId, state: "awaiting_work_payment" });
    } else {
      await db
        .update(jobs)
        .set({ state: "rejected", rejectionReason: evalResult.reason, updatedAt: new Date() })
        .where(eq(jobs.id, jobId));
+      eventBus.publish({ type: "job:state", jobId, state: "rejected" });
    }
  } catch (err) {
    const message = err instanceof Error ? err.message : "Evaluation error";
@@ -77,15 +96,25 @@ async function runEvalInBackground(jobId: string, request: string): Promise<void
      .update(jobs)
      .set({ state: "failed", errorMessage: message, updatedAt: new Date() })
      .where(eq(jobs.id, jobId));
+    eventBus.publish({ type: "job:failed", jobId, reason: message });
  }
 }

 /**
 * Runs the AI work execution in a background task so HTTP polls return fast.
+ * Uses streaming so any connected SSE client receives tokens in real time (#3).
 */
 async function runWorkInBackground(jobId: string, request: string, workAmountSats: number, btcPriceUsd: number | null): Promise<void> {
+  const workStart = Date.now();
  try {
-    const workResult = await agentService.executeWork(request);
+    eventBus.publish({ type: "job:state", jobId, state: "executing" });
+
+    const workResult = await agentService.executeWorkStreaming(request, (delta) => {
+      streamRegistry.write(jobId, delta);
+    });
+
+    streamRegistry.end(jobId);
+    latencyHistogram.record("work_phase", Date.now() - workStart);

    const actualCostUsd = pricingService.calculateActualCostUsd(
      workResult.inputTokens,
@@ -112,12 +141,24 @@ async function runWorkInBackground(jobId: string, request: string, workAmountSat
        updatedAt: new Date(),
      })
      .where(eq(jobs.id, jobId));
+
+    logger.info("work completed", {
+      jobId,
+      inputTokens:      workResult.inputTokens,
+      outputTokens:     workResult.outputTokens,
+      actualAmountSats,
+      refundAmountSats,
+      refundState,
+    });
+    eventBus.publish({ type: "job:completed", jobId, result: workResult.result });
  } catch (err) {
    const message = err instanceof Error ? err.message : "Execution error";
+    streamRegistry.end(jobId);
    await db
      .update(jobs)
      .set({ state: "failed", errorMessage: message, updatedAt: new Date() })
      .where(eq(jobs.id, jobId));
+    eventBus.publish({ type: "job:failed", jobId, reason: message });
  }
 }

@@ -149,6 +190,10 @@ async function advanceJob(job: Job): Promise<Job | null> {

    if (!advanced) return getJobById(job.id);

+    logger.info("invoice paid", { jobId: job.id, invoiceType: "eval", paymentHash: evalInvoice.paymentHash });
+    eventBus.publish({ type: "job:paid", jobId: job.id, invoiceType: "eval" });
+    eventBus.publish({ type: "job:state", jobId: job.id, state: "evaluating" });
+
    // Fire AI eval in background — poll returns immediately with "evaluating"
    setImmediate(() => { void runEvalInBackground(job.id, job.request); });

@@ -177,6 +222,12 @@ async function advanceJob(job: Job): Promise<Job | null> {

    if (!advanced) return getJobById(job.id);

+    logger.info("invoice paid", { jobId: job.id, invoiceType: "work", paymentHash: workInvoice.paymentHash });
+    eventBus.publish({ type: "job:paid", jobId: job.id, invoiceType: "work" });
+
+    // Register stream slot before firing background work so first tokens aren't lost
+    streamRegistry.register(job.id);
+
    // Fire AI work in background — poll returns immediately with "executing"
    setImmediate(() => { void runWorkInBackground(job.id, job.request, job.workAmountSats ?? 0, job.btcPriceUsd); });

@@ -188,7 +239,7 @@ async function advanceJob(job: Job): Promise<Job | null> {

 // ── POST /jobs ────────────────────────────────────────────────────────────────

-router.post("/jobs", async (req: Request, res: Response) => {
+router.post("/jobs", jobsLimiter, async (req: Request, res: Response) => {
  const parseResult = CreateJobBody.safeParse(req.body);
  if (!parseResult.success) {
    const issue = parseResult.error.issues[0];
@@ -221,6 +272,8 @@ router.post("/jobs", async (req: Request, res: Response) => {
      await tx.update(jobs).set({ evalInvoiceId: invoiceId, updatedAt: new Date() }).where(eq(jobs.id, jobId));
    });

+    logger.info("job created", { jobId, evalAmountSats: evalFee, stubMode: lnbitsService.stubMode });
+
    res.status(201).json({
      jobId,
      evalInvoice: {
@@ -231,6 +284,7 @@ router.post("/jobs", async (req: Request, res: Response) => {
    });
  } catch (err) {
    const message = err instanceof Error ? err.message : "Failed to create job";
+    logger.error("job creation failed", { error: message });
    res.status(500).json({ error: message });
  }
 });
@@ -404,4 +458,130 @@ router.post("/jobs/:id/refund", async (req: Request, res: Response) => {
  }
 });

+// ── GET /jobs/:id/stream ──────────────────────────────────────────────────────
+// Server-Sent Events (#3): streams Claude token deltas in real time while the
+// job is executing. If the job is already complete, sends the full result then
+// closes. If the job isn't executing yet, waits up to 60 s for it to start.
+
+router.get("/jobs/:id/stream", async (req: Request, res: Response) => {
+  const paramResult = GetJobParams.safeParse(req.params);
+  if (!paramResult.success) {
+    res.status(400).json({ error: "Invalid job id" });
+    return;
+  }
+  const { id } = paramResult.data;
+
+  const job = await getJobById(id);
+  if (!job) {
+    res.status(404).json({ error: "Job not found" });
+    return;
+  }
+
+  res.setHeader("Content-Type", "text/event-stream");
+  res.setHeader("Cache-Control", "no-cache");
+  res.setHeader("Connection", "keep-alive");
+  res.setHeader("X-Accel-Buffering", "no");
+  res.flushHeaders();
+
+  const sendEvent = (event: string, data: unknown) => {
+    res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
+  };
+
+  // Job already complete — replay full result immediately
+  if (job.state === "complete" && job.result) {
+    sendEvent("token", { text: job.result });
+    sendEvent("done", { jobId: id, state: "complete" });
+    res.end();
+    return;
+  }
+
+  if (job.state === "failed") {
+    sendEvent("error", { jobId: id, message: job.errorMessage ?? "Job failed" });
+    res.end();
+    return;
+  }
+
+  // Job is executing or about to execute — pipe the live stream
+  const sendHeartbeat = setInterval(() => {
+    res.write(": heartbeat\n\n");
+  }, 15_000);
+
+  const cleanup = () => {
+    clearInterval(sendHeartbeat);
+  };
+
+  req.on("close", cleanup);
+
+  // ── Wait for stream slot (fixes #16 race condition) ──────────────────────
+  // After the bus wait we re-check BOTH the stream registry AND the DB so we
+  // handle: (a) job completed while we waited (stream already gone), (b) job
+  // still executing but stream was registered after we first checked.
+  let stream = streamRegistry.get(id);
+  let currentJob = job;
+
+  if (!stream) {
+    await new Promise<void>((resolve) => {
+      // 90 s timeout — generous enough for slow payment confirmations on mainnet
+      const deadline = setTimeout(resolve, 90_000);
+      const busListener = (data: Parameters<typeof eventBus.publish>[0]) => {
+        if ("jobId" in data && data.jobId === id) {
+          clearTimeout(deadline);
+          eventBus.off("bus", busListener);
+          resolve();
+        }
+      };
+      eventBus.on("bus", busListener);
+    });
+
+    // Refresh both stream slot and job state after waiting
+    stream = streamRegistry.get(id);
+    currentJob = (await getJobById(id)) ?? currentJob;
+  }
+
+  // ── Resolve: stream available ─────────────────────────────────────────────
+  if (stream) {
+    const attachToStream = (s: typeof stream) => {
+      s!.on("data", (chunk: Buffer) => {
+        sendEvent("token", { text: chunk.toString("utf8") });
+      });
+      s!.on("end", () => {
+        sendEvent("done", { jobId: id, state: "complete" });
+        res.end();
+        cleanup();
+      });
+      s!.on("error", (err: Error) => {
+        sendEvent("error", { jobId: id, message: err.message });
+        res.end();
+        cleanup();
+      });
+    };
+    attachToStream(stream);
+    return;
+  }
+
+  // ── Resolve: job completed while we waited (stream already gone) ──────────
+  if (currentJob.state === "complete" && currentJob.result) {
+    sendEvent("token", { text: currentJob.result });
+    sendEvent("done", { jobId: id, state: "complete" });
+    res.end();
+    cleanup();
+    return;
+  }
+
+  if (currentJob.state === "failed") {
+    sendEvent("error", { jobId: id, message: currentJob.errorMessage ?? "Job failed" });
+    res.end();
+    cleanup();
+    return;
+  }
+
+  // ── Resolve: timeout with no activity — tell client to fall back to polling
+  sendEvent("error", {
+    jobId: id,
+    message: "Stream timed out. Poll GET /api/jobs/:id for current state.",
+  });
+  res.end();
+  cleanup();
+});
+
 export default router;
--- a/artifacts/api-server/src/routes/metrics.ts
+++ b/artifacts/api-server/src/routes/metrics.ts
@@ -0,0 +1,19 @@
+import { Router, type Request, type Response } from "express";
+import { metricsService } from "../lib/metrics.js";
+import { makeLogger } from "../lib/logger.js";
+
+const router = Router();
+const logger = makeLogger("metrics");
+
+router.get("/metrics", async (_req: Request, res: Response) => {
+  try {
+    const snapshot = await metricsService.snapshot();
+    res.json(snapshot);
+  } catch (err) {
+    const message = err instanceof Error ? err.message : "Failed to collect metrics";
+    logger.error("snapshot failed", { error: message });
+    res.status(500).json({ error: message });
+  }
+});
+
+export default router;
--- a/artifacts/api-server/src/routes/sessions.ts
+++ b/artifacts/api-server/src/routes/sessions.ts
@@ -3,6 +3,8 @@ import { randomBytes, randomUUID } from "crypto";
 import { db, sessions, sessionRequests, type Session } from "@workspace/db";
 import { eq, and } from "drizzle-orm";
 import { lnbitsService } from "../lib/lnbits.js";
+import { sessionsLimiter } from "../lib/rate-limiter.js";
+import { eventBus } from "../lib/event-bus.js";
 import { agentService } from "../lib/agent.js";
 import { pricingService } from "../lib/pricing.js";
 import { getBtcPriceUsd, usdToSats } from "../lib/btc-oracle.js";
@@ -133,7 +135,7 @@ async function advanceTopup(session: Session): Promise<Session> {

 // ── POST /sessions ─────────────────────────────────────────────────────────────

-router.post("/sessions", async (req: Request, res: Response) => {
+router.post("/sessions", sessionsLimiter, async (req: Request, res: Response) => {
  const rawAmount = req.body?.amount_sats;
  const amountSats = parseInt(String(rawAmount ?? ""), 10);

@@ -220,7 +222,7 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
  }

  try {
-    let session = await getSessionById(id);
+    const session = await getSessionById(id);
    if (!session) { res.status(404).json({ error: "Session not found" }); return; }

    // Auth
--- a/artifacts/api-server/src/routes/testkit.ts
+++ b/artifacts/api-server/src/routes/testkit.ts
@@ -9,6 +9,8 @@ const router = Router();
 * BASE URL. Agents and testers can run the full test suite with one command:
 *
 *   curl -s https://your-url.replit.app/api/testkit | bash
+ *
+ * Cross-platform: works on Linux and macOS (avoids GNU-only head -n-1).
 */
 router.get("/testkit", (req: Request, res: Response) => {
  const proto =
@@ -31,16 +33,17 @@ FAIL=0
 SKIP=0

 note() { echo "  [\$1] \$2"; }
-jq_field() { echo "\$1" | jq -r "\$2" 2>/dev/null || echo ""; }
-sep() { echo; echo "=== $* ==="; }
+sep()  { echo; echo "=== $* ==="; }
+# body_of: strip last line (HTTP status code) — works on GNU and BSD (macOS)
+body_of() { echo "\$1" | sed '$d'; }
+code_of() { echo "\$1" | tail -n1; }

 # ---------------------------------------------------------------------------
 # Test 1 — Health check
 # ---------------------------------------------------------------------------
 sep "Test 1 — Health check"
 T1_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/healthz")
-T1_BODY=$(echo "$T1_RES" | head -n-1)
-T1_CODE=$(echo "$T1_RES" | tail -n1)
+T1_BODY=$(body_of "$T1_RES"); T1_CODE=$(code_of "$T1_RES")
 if [[ "$T1_CODE" == "200" ]] && [[ "$(echo "$T1_BODY" | jq -r '.status' 2>/dev/null)" == "ok" ]]; then
  note PASS "HTTP 200, status=ok"
  PASS=$((PASS+1))
@@ -56,8 +59,7 @@ sep "Test 2 — Create job"
 T2_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\
  -H "Content-Type: application/json" \\
  -d '{"request":"Explain the Lightning Network in two sentences"}')
-T2_BODY=$(echo "$T2_RES" | head -n-1)
-T2_CODE=$(echo "$T2_RES" | tail -n1)
+T2_BODY=$(body_of "$T2_RES"); T2_CODE=$(code_of "$T2_RES")
 JOB_ID=$(echo "$T2_BODY" | jq -r '.jobId' 2>/dev/null || echo "")
 EVAL_AMT=$(echo "$T2_BODY" | jq -r '.evalInvoice.amountSats' 2>/dev/null || echo "")
 if [[ "$T2_CODE" == "201" && -n "$JOB_ID" && "$EVAL_AMT" == "10" ]]; then
@@ -73,8 +75,7 @@ fi
 # ---------------------------------------------------------------------------
 sep "Test 3 — Poll before payment"
 T3_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
-T3_BODY=$(echo "$T3_RES" | head -n-1)
-T3_CODE=$(echo "$T3_RES" | tail -n1)
+T3_BODY=$(body_of "$T3_RES"); T3_CODE=$(code_of "$T3_RES")
 STATE_T3=$(echo "$T3_BODY" | jq -r '.state' 2>/dev/null || echo "")
 EVAL_AMT_ECHO=$(echo "$T3_BODY" | jq -r '.evalInvoice.amountSats' 2>/dev/null || echo "")
 EVAL_HASH=$(echo "$T3_BODY" | jq -r '.evalInvoice.paymentHash' 2>/dev/null || echo "")
@@ -99,8 +100,7 @@ fi
 sep "Test 4 — Pay eval invoice (stub)"
 if [[ -n "$EVAL_HASH" && "$EVAL_HASH" != "null" ]]; then
  T4_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$EVAL_HASH")
-  T4_BODY=$(echo "$T4_RES" | head -n-1)
-  T4_CODE=$(echo "$T4_RES" | tail -n1)
+  T4_BODY=$(body_of "$T4_RES"); T4_CODE=$(code_of "$T4_RES")
  if [[ "$T4_CODE" == "200" ]] && [[ "$(echo "$T4_BODY" | jq -r '.ok' 2>/dev/null)" == "true" ]]; then
    note PASS "Eval invoice marked paid"
    PASS=$((PASS+1))
@@ -114,25 +114,32 @@ else
 fi

 # ---------------------------------------------------------------------------
-# Test 5 — Poll after eval payment
+# Test 5 — Poll after eval payment (with retry loop — real AI eval takes 2–5 s)
 # ---------------------------------------------------------------------------
 sep "Test 5 — Poll after eval (state advance)"
-sleep 2
-T5_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
-T5_BODY=$(echo "$T5_RES" | head -n-1)
-T5_CODE=$(echo "$T5_RES" | tail -n1)
-STATE_T5=$(echo "$T5_BODY" | jq -r '.state' 2>/dev/null || echo "")
-WORK_AMT=$(echo "$T5_BODY" | jq -r '.workInvoice.amountSats' 2>/dev/null || echo "")
-WORK_HASH=$(echo "$T5_BODY" | jq -r '.workInvoice.paymentHash' 2>/dev/null || echo "")
+START_T5=$(date +%s)
+T5_TIMEOUT=30
+STATE_T5=""; WORK_AMT=""; WORK_HASH=""; T5_BODY=""; T5_CODE=""
+while :; do
+  T5_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
+  T5_BODY=$(body_of "$T5_RES"); T5_CODE=$(code_of "$T5_RES")
+  STATE_T5=$(echo "$T5_BODY" | jq -r '.state' 2>/dev/null || echo "")
+  WORK_AMT=$(echo "$T5_BODY" | jq -r '.workInvoice.amountSats' 2>/dev/null || echo "")
+  WORK_HASH=$(echo "$T5_BODY" | jq -r '.workInvoice.paymentHash' 2>/dev/null || echo "")
+  NOW_T5=$(date +%s); ELAPSED_T5=$((NOW_T5 - START_T5))
+  if [[ "$STATE_T5" == "awaiting_work_payment" || "$STATE_T5" == "rejected" ]]; then break; fi
+  if (( ELAPSED_T5 > T5_TIMEOUT )); then break; fi
+  sleep 2
+done
 if [[ "$T5_CODE" == "200" && "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_AMT" && "$WORK_AMT" != "null" ]]; then
-  note PASS "state=awaiting_work_payment, workInvoice.amountSats=$WORK_AMT"
+  note PASS "state=awaiting_work_payment in $ELAPSED_T5 s, workInvoice.amountSats=$WORK_AMT"
  PASS=$((PASS+1))
 elif [[ "$T5_CODE" == "200" && "$STATE_T5" == "rejected" ]]; then
-  note PASS "Request correctly rejected by agent after eval"
+  note PASS "Request correctly rejected by agent after eval (in $ELAPSED_T5 s)"
  PASS=$((PASS+1))
  WORK_HASH=""
 else
-  note FAIL "code=$T5_CODE state=$STATE_T5 body=$T5_BODY"
+  note FAIL "code=$T5_CODE state=$STATE_T5 body=$T5_BODY (after $ELAPSED_T5 s)"
  FAIL=$((FAIL+1))
 fi

@@ -142,8 +149,7 @@ fi
 sep "Test 6 — Pay work invoice + get result"
 if [[ "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_HASH" && "$WORK_HASH" != "null" ]]; then
  T6_PAY_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$WORK_HASH")
-  T6_PAY_BODY=$(echo "$T6_PAY_RES" | head -n-1)
-  T6_PAY_CODE=$(echo "$T6_PAY_RES" | tail -n1)
+  T6_PAY_BODY=$(body_of "$T6_PAY_RES"); T6_PAY_CODE=$(code_of "$T6_PAY_RES")
  if [[ "$T6_PAY_CODE" != "200" ]] || [[ "$(echo "$T6_PAY_BODY" | jq -r '.ok' 2>/dev/null)" != "true" ]]; then
    note FAIL "Work payment stub failed: code=$T6_PAY_CODE body=$T6_PAY_BODY"
    FAIL=$((FAIL+1))
@@ -152,11 +158,10 @@ if [[ "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_HASH" && "$WORK_HASH"
    TIMEOUT=30
    while :; do
      T6_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
-      T6_BODY=$(echo "$T6_RES" | head -n-1)
+      T6_BODY=$(body_of "$T6_RES")
      STATE_T6=$(echo "$T6_BODY" | jq -r '.state' 2>/dev/null || echo "")
      RESULT_T6=$(echo "$T6_BODY" | jq -r '.result' 2>/dev/null || echo "")
-      NOW_TS=$(date +%s)
-      ELAPSED=$((NOW_TS - START_TS))
+      NOW_TS=$(date +%s); ELAPSED=$((NOW_TS - START_TS))
      if [[ "$STATE_T6" == "complete" && -n "$RESULT_T6" && "$RESULT_T6" != "null" ]]; then
        note PASS "state=complete in $ELAPSED s"
        echo "    Result: \${RESULT_T6:0:200}..."
@@ -177,33 +182,13 @@ else
 fi

 # ---------------------------------------------------------------------------
-# Test 7 — Demo endpoint
-# ---------------------------------------------------------------------------
-sep "Test 7 — Demo endpoint"
-START_DEMO=$(date +%s)
-T7_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo?request=What+is+a+satoshi")
-T7_BODY=$(echo "$T7_RES" | head -n-1)
-T7_CODE=$(echo "$T7_RES" | tail -n1)
-END_DEMO=$(date +%s)
-ELAPSED_DEMO=$((END_DEMO - START_DEMO))
-RESULT_T7=$(echo "$T7_BODY" | jq -r '.result' 2>/dev/null || echo "")
-if [[ "$T7_CODE" == "200" && -n "$RESULT_T7" && "$RESULT_T7" != "null" ]]; then
-  note PASS "HTTP 200, result in $ELAPSED_DEMO s"
-  echo "    Result: \${RESULT_T7:0:200}..."
-  PASS=$((PASS+1))
-else
-  note FAIL "code=$T7_CODE body=$T7_BODY"
-  FAIL=$((FAIL+1))
-fi
-
-# ---------------------------------------------------------------------------
-# Test 8 — Input validation (4 sub-cases)
+# Test 8 — Input validation (run BEFORE test 7 to avoid rate-limit interference)
 # ---------------------------------------------------------------------------
 sep "Test 8 — Input validation"

 T8A_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\
  -H "Content-Type: application/json" -d '{}')
-T8A_BODY=$(echo "$T8A_RES" | head -n-1); T8A_CODE=$(echo "$T8A_RES" | tail -n1)
+T8A_BODY=$(body_of "$T8A_RES"); T8A_CODE=$(code_of "$T8A_RES")
 if [[ "$T8A_CODE" == "400" && -n "$(echo "$T8A_BODY" | jq -r '.error' 2>/dev/null)" ]]; then
  note PASS "8a: Missing request body → HTTP 400"
  PASS=$((PASS+1))
@@ -213,7 +198,7 @@ else
 fi

 T8B_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/does-not-exist")
-T8B_BODY=$(echo "$T8B_RES" | head -n-1); T8B_CODE=$(echo "$T8B_RES" | tail -n1)
+T8B_BODY=$(body_of "$T8B_RES"); T8B_CODE=$(code_of "$T8B_RES")
 if [[ "$T8B_CODE" == "404" && -n "$(echo "$T8B_BODY" | jq -r '.error' 2>/dev/null)" ]]; then
  note PASS "8b: Unknown job ID → HTTP 404"
  PASS=$((PASS+1))
@@ -222,8 +207,9 @@ else
  FAIL=$((FAIL+1))
 fi

+# 8c runs here — before tests 7 and 9 consume rate-limit quota
 T8C_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo")
-T8C_BODY=$(echo "$T8C_RES" | head -n-1); T8C_CODE=$(echo "$T8C_RES" | tail -n1)
+T8C_BODY=$(body_of "$T8C_RES"); T8C_CODE=$(code_of "$T8C_RES")
 if [[ "$T8C_CODE" == "400" && -n "$(echo "$T8C_BODY" | jq -r '.error' 2>/dev/null)" ]]; then
  note PASS "8c: Demo missing param → HTTP 400"
  PASS=$((PASS+1))
@@ -236,7 +222,7 @@ LONG_STR=$(node -e "process.stdout.write('x'.repeat(501))" 2>/dev/null || python
 T8D_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\
  -H "Content-Type: application/json" \\
  -d "{\\"request\\":\\"$LONG_STR\\"}")
-T8D_BODY=$(echo "$T8D_RES" | head -n-1); T8D_CODE=$(echo "$T8D_RES" | tail -n1)
+T8D_BODY=$(body_of "$T8D_RES"); T8D_CODE=$(code_of "$T8D_RES")
 T8D_ERR=$(echo "$T8D_BODY" | jq -r '.error' 2>/dev/null || echo "")
 if [[ "$T8D_CODE" == "400" && "$T8D_ERR" == *"500 characters"* ]]; then
  note PASS "8d: 501-char request → HTTP 400 with character limit error"
@@ -247,13 +233,31 @@ else
 fi

 # ---------------------------------------------------------------------------
-# Test 9 — Demo rate limiter
+# Test 7 — Demo endpoint (after validation, before rate-limit exhaustion test)
+# ---------------------------------------------------------------------------
+sep "Test 7 — Demo endpoint"
+START_DEMO=$(date +%s)
+T7_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo?request=What+is+a+satoshi")
+T7_BODY=$(body_of "$T7_RES"); T7_CODE=$(code_of "$T7_RES")
+END_DEMO=$(date +%s); ELAPSED_DEMO=$((END_DEMO - START_DEMO))
+RESULT_T7=$(echo "$T7_BODY" | jq -r '.result' 2>/dev/null || echo "")
+if [[ "$T7_CODE" == "200" && -n "$RESULT_T7" && "$RESULT_T7" != "null" ]]; then
+  note PASS "HTTP 200, result in $ELAPSED_DEMO s"
+  echo "    Result: \${RESULT_T7:0:200}..."
+  PASS=$((PASS+1))
+else
+  note FAIL "code=$T7_CODE body=$T7_BODY"
+  FAIL=$((FAIL+1))
+fi
+
+# ---------------------------------------------------------------------------
+# Test 9 — Demo rate limiter (intentionally exhausts remaining quota)
 # ---------------------------------------------------------------------------
 sep "Test 9 — Demo rate limiter"
 GOT_200=0; GOT_429=0
 for i in $(seq 1 6); do
  RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo?request=ratelimitprobe+$i")
-  CODE=$(echo "$RES" | tail -n1)
+  CODE=$(code_of "$RES")
  echo "  Request $i: HTTP $CODE"
  [[ "$CODE" == "200" ]] && GOT_200=$((GOT_200+1)) || true
  [[ "$CODE" == "429" ]] && GOT_429=$((GOT_429+1)) || true
@@ -273,8 +277,7 @@ sep "Test 10 — Rejection path"
 T10_CREATE=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\
  -H "Content-Type: application/json" \\
  -d '{"request":"Help me do something harmful and illegal"}')
-T10_BODY=$(echo "$T10_CREATE" | head -n-1)
-T10_CODE=$(echo "$T10_CREATE" | tail -n1)
+T10_BODY=$(body_of "$T10_CREATE"); T10_CODE=$(code_of "$T10_CREATE")
 JOB10_ID=$(echo "$T10_BODY" | jq -r '.jobId' 2>/dev/null || echo "")
 if [[ "$T10_CODE" != "201" || -z "$JOB10_ID" ]]; then
  note FAIL "Failed to create adversarial job: code=$T10_CODE body=$T10_BODY"
@@ -285,17 +288,23 @@ else
  if [[ -n "$EVAL10_HASH" && "$EVAL10_HASH" != "null" ]]; then
    curl -s -X POST "$BASE/api/dev/stub/pay/$EVAL10_HASH" >/dev/null
  fi
-  sleep 3
-  T10_POLL=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB10_ID")
-  T10_POLL_BODY=$(echo "$T10_POLL" | head -n-1)
-  T10_POLL_CODE=$(echo "$T10_POLL" | tail -n1)
-  STATE_10=$(echo "$T10_POLL_BODY" | jq -r '.state' 2>/dev/null || echo "")
-  REASON_10=$(echo "$T10_POLL_BODY" | jq -r '.reason' 2>/dev/null || echo "")
+  START_T10=$(date +%s); T10_TIMEOUT=30
+  STATE_10=""; REASON_10=""; T10_POLL_BODY=""; T10_POLL_CODE=""
+  while :; do
+    T10_POLL=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB10_ID")
+    T10_POLL_BODY=$(body_of "$T10_POLL"); T10_POLL_CODE=$(code_of "$T10_POLL")
+    STATE_10=$(echo "$T10_POLL_BODY" | jq -r '.state' 2>/dev/null || echo "")
+    REASON_10=$(echo "$T10_POLL_BODY" | jq -r '.reason' 2>/dev/null || echo "")
+    NOW_T10=$(date +%s); ELAPSED_T10=$((NOW_T10 - START_T10))
+    if [[ "$STATE_10" == "rejected" || "$STATE_10" == "failed" ]]; then break; fi
+    if (( ELAPSED_T10 > T10_TIMEOUT )); then break; fi
+    sleep 2
+  done
  if [[ "$T10_POLL_CODE" == "200" && "$STATE_10" == "rejected" && -n "$REASON_10" && "$REASON_10" != "null" ]]; then
-    note PASS "state=rejected, reason: \${REASON_10:0:120}"
+    note PASS "state=rejected in $ELAPSED_T10 s, reason: \${REASON_10:0:120}"
    PASS=$((PASS+1))
  else
-    note FAIL "code=$T10_POLL_CODE state=$STATE_10 body=$T10_POLL_BODY"
+    note FAIL "code=$T10_POLL_CODE state=$STATE_10 body=$T10_POLL_BODY (after $ELAPSED_T10 s)"
    FAIL=$((FAIL+1))
  fi
 fi
@@ -307,8 +316,7 @@ sep "Test 11 — Session: create session (awaiting_payment)"
 T11_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/sessions" \\
  -H "Content-Type: application/json" \\
  -d '{"amount_sats": 200}')
-T11_BODY=$(echo "$T11_RES" | head -n-1)
-T11_CODE=$(echo "$T11_RES" | tail -n1)
+T11_BODY=$(body_of "$T11_RES"); T11_CODE=$(code_of "$T11_RES")
 SESSION_ID=$(echo "$T11_BODY" | jq -r '.sessionId' 2>/dev/null || echo "")
 T11_STATE=$(echo "$T11_BODY" | jq -r '.state' 2>/dev/null || echo "")
 T11_AMT=$(echo "$T11_BODY" | jq -r '.invoice.amountSats' 2>/dev/null || echo "")
@@ -322,12 +330,11 @@ else
 fi

 # ---------------------------------------------------------------------------
-# Test 12 — Session: poll before payment (stub hash present)
+# Test 12 — Session: poll before payment
 # ---------------------------------------------------------------------------
 sep "Test 12 — Session: poll before payment"
 T12_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/sessions/$SESSION_ID")
-T12_BODY=$(echo "$T12_RES" | head -n-1)
-T12_CODE=$(echo "$T12_RES" | tail -n1)
+T12_BODY=$(body_of "$T12_RES"); T12_CODE=$(code_of "$T12_RES")
 T12_STATE=$(echo "$T12_BODY" | jq -r '.state' 2>/dev/null || echo "")
 if [[ -z "$DEPOSIT_HASH" || "$DEPOSIT_HASH" == "null" ]]; then
  DEPOSIT_HASH=$(echo "$T12_BODY" | jq -r '.invoice.paymentHash' 2>/dev/null || echo "")
@@ -348,8 +355,7 @@ if [[ -n "$DEPOSIT_HASH" && "$DEPOSIT_HASH" != "null" ]]; then
  curl -s -X POST "$BASE/api/dev/stub/pay/$DEPOSIT_HASH" >/dev/null
  sleep 1
  T13_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/sessions/$SESSION_ID")
-  T13_BODY=$(echo "$T13_RES" | head -n-1)
-  T13_CODE=$(echo "$T13_RES" | tail -n1)
+  T13_BODY=$(body_of "$T13_RES"); T13_CODE=$(code_of "$T13_RES")
  T13_STATE=$(echo "$T13_BODY" | jq -r '.state' 2>/dev/null || echo "")
  T13_BAL=$(echo "$T13_BODY" | jq -r '.balanceSats' 2>/dev/null || echo "")
  SESSION_MACAROON=$(echo "$T13_BODY" | jq -r '.macaroon' 2>/dev/null || echo "")
@@ -375,15 +381,13 @@ if [[ -n "$SESSION_MACAROON" && "$SESSION_MACAROON" != "null" ]]; then
    -H "Content-Type: application/json" \\
    -H "Authorization: Bearer $SESSION_MACAROON" \\
    -d '{"request":"What is Bitcoin in one sentence?"}')
-  T14_BODY=$(echo "$T14_RES" | head -n-1)
-  T14_CODE=$(echo "$T14_RES" | tail -n1)
+  T14_BODY=$(body_of "$T14_RES"); T14_CODE=$(code_of "$T14_RES")
  T14_STATE=$(echo "$T14_BODY" | jq -r '.state' 2>/dev/null || echo "")
  T14_DEBITED=$(echo "$T14_BODY" | jq -r '.debitedSats' 2>/dev/null || echo "")
  T14_BAL=$(echo "$T14_BODY" | jq -r '.balanceRemaining' 2>/dev/null || echo "")
-  END_T14=$(date +%s)
-  ELAPSED_T14=$((END_T14 - START_T14))
+  END_T14=$(date +%s); ELAPSED_T14=$((END_T14 - START_T14))
  if [[ "$T14_CODE" == "200" && ("$T14_STATE" == "complete" || "$T14_STATE" == "rejected") && -n "$T14_DEBITED" && "$T14_DEBITED" != "null" && -n "$T14_BAL" ]]; then
-    note PASS "state=$T14_STATE in ${ELAPSED_T14}s, debitedSats=$T14_DEBITED, balanceRemaining=$T14_BAL"
+    note PASS "state=$T14_STATE in \${ELAPSED_T14}s, debitedSats=$T14_DEBITED, balanceRemaining=$T14_BAL"
    PASS=$((PASS+1))
  else
    note FAIL "code=$T14_CODE body=$T14_BODY"
@@ -402,7 +406,7 @@ if [[ -n "$SESSION_ID" ]]; then
  T15_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/sessions/$SESSION_ID/request" \\
    -H "Content-Type: application/json" \\
    -d '{"request":"What is Bitcoin?"}')
-  T15_CODE=$(echo "$T15_RES" | tail -n1)
+  T15_CODE=$(code_of "$T15_RES")
  if [[ "$T15_CODE" == "401" ]]; then
    note PASS "HTTP 401 without macaroon"
    PASS=$((PASS+1))
@@ -424,8 +428,7 @@ if [[ -n "$SESSION_MACAROON" && "$SESSION_MACAROON" != "null" ]]; then
    -H "Content-Type: application/json" \\
    -H "Authorization: Bearer $SESSION_MACAROON" \\
    -d '{"amount_sats": 500}')
-  T16_BODY=$(echo "$T16_RES" | head -n-1)
-  T16_CODE=$(echo "$T16_RES" | tail -n1)
+  T16_BODY=$(body_of "$T16_RES"); T16_CODE=$(code_of "$T16_RES")
  T16_PR=$(echo "$T16_BODY" | jq -r '.topup.paymentRequest' 2>/dev/null || echo "")
  T16_AMT=$(echo "$T16_BODY" | jq -r '.topup.amountSats' 2>/dev/null || echo "")
  if [[ "$T16_CODE" == "200" && -n "$T16_PR" && "$T16_PR" != "null" && "$T16_AMT" == "500" ]]; then