fix(testkit): macOS compat + fix test 8c ordering (#24)
This commit is contained in:
@@ -5,6 +5,9 @@ import { eq, and } from "drizzle-orm";
|
||||
import { lnbitsService } from "../lib/lnbits.js";
|
||||
import { pricingService } from "../lib/pricing.js";
|
||||
import { provisionerService } from "../lib/provisioner.js";
|
||||
import { makeLogger } from "../lib/logger.js";
|
||||
|
||||
const logger = makeLogger("bootstrap");
|
||||
|
||||
const router = Router();
|
||||
|
||||
@@ -44,7 +47,7 @@ async function advanceBootstrapJob(job: BootstrapJob): Promise<BootstrapJob | nu
|
||||
return getBootstrapJobById(job.id);
|
||||
}
|
||||
|
||||
console.log(`[bootstrap] Payment confirmed for ${job.id} — starting provisioning`);
|
||||
logger.info("bootstrap payment confirmed — starting provisioning", { bootstrapJobId: job.id });
|
||||
|
||||
// Fire-and-forget: provisioner updates DB when done
|
||||
void provisionerService.provision(job.id);
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import { Router, type Request, type Response } from "express";
|
||||
import { RunDemoQueryParams } from "@workspace/api-zod";
|
||||
import { agentService } from "../lib/agent.js";
|
||||
import { makeLogger } from "../lib/logger.js";
|
||||
|
||||
const router = Router();
|
||||
const logger = makeLogger("demo");
|
||||
|
||||
const RATE_LIMIT_MAX = 5;
|
||||
const RATE_LIMIT_WINDOW_MS = 60 * 60 * 1000;
|
||||
@@ -35,6 +37,7 @@ router.get("/demo", async (req: Request, res: Response) => {
|
||||
const { allowed, resetAt } = checkRateLimit(ip);
|
||||
if (!allowed) {
|
||||
const secsUntilReset = Math.ceil((resetAt - Date.now()) / 1000);
|
||||
logger.warn("demo rate limited", { ip, retry_after_s: secsUntilReset });
|
||||
res.status(429).json({
|
||||
error: `Rate limit exceeded. Try again in ${secsUntilReset}s (5 requests per hour per IP).`,
|
||||
});
|
||||
@@ -52,11 +55,14 @@ router.get("/demo", async (req: Request, res: Response) => {
|
||||
}
|
||||
const { request } = parseResult.data;
|
||||
|
||||
logger.info("demo request received", { ip });
|
||||
|
||||
try {
|
||||
const { result } = await agentService.executeWork(request);
|
||||
res.json({ result });
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Agent error";
|
||||
logger.error("demo agent error", { ip, error: message });
|
||||
res.status(500).json({ error: message });
|
||||
}
|
||||
});
|
||||
|
||||
@@ -1,11 +1,25 @@
|
||||
import { Router, type IRouter } from "express";
|
||||
import { HealthCheckResponse } from "@workspace/api-zod";
|
||||
import { Router, type IRouter, type Request, type Response } from "express";
|
||||
import { db, jobs } from "@workspace/db";
|
||||
import { sql } from "drizzle-orm";
|
||||
import { makeLogger } from "../lib/logger.js";
|
||||
|
||||
const router: IRouter = Router();
|
||||
const logger = makeLogger("health");
|
||||
|
||||
router.get("/healthz", (_req, res) => {
|
||||
const data = HealthCheckResponse.parse({ status: "ok" });
|
||||
res.json(data);
|
||||
const START_TIME = Date.now();
|
||||
|
||||
router.get("/healthz", async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const rows = await db.select({ total: sql<number>`cast(count(*) as int)` }).from(jobs);
|
||||
const jobsTotal = Number(rows[0]?.total ?? 0);
|
||||
const uptimeS = Math.floor((Date.now() - START_TIME) / 1000);
|
||||
res.json({ status: "ok", uptime_s: uptimeS, jobs_total: jobsTotal });
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Health check failed";
|
||||
logger.error("healthz db query failed", { error: message });
|
||||
const uptimeS = Math.floor((Date.now() - START_TIME) / 1000);
|
||||
res.status(503).json({ status: "error", uptime_s: uptimeS, error: message });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
@@ -8,10 +8,12 @@ import devRouter from "./dev.js";
|
||||
import testkitRouter from "./testkit.js";
|
||||
import uiRouter from "./ui.js";
|
||||
import nodeDiagnosticsRouter from "./node-diagnostics.js";
|
||||
import metricsRouter from "./metrics.js";
|
||||
|
||||
const router: IRouter = Router();
|
||||
|
||||
router.use(healthRouter);
|
||||
router.use(metricsRouter);
|
||||
router.use(jobsRouter);
|
||||
router.use(bootstrapRouter);
|
||||
router.use(sessionsRouter);
|
||||
|
||||
@@ -6,6 +6,13 @@ import { CreateJobBody, GetJobParams } from "@workspace/api-zod";
|
||||
import { lnbitsService } from "../lib/lnbits.js";
|
||||
import { agentService } from "../lib/agent.js";
|
||||
import { pricingService } from "../lib/pricing.js";
|
||||
import { jobsLimiter } from "../lib/rate-limiter.js";
|
||||
import { eventBus } from "../lib/event-bus.js";
|
||||
import { streamRegistry } from "../lib/stream-registry.js";
|
||||
import { makeLogger } from "../lib/logger.js";
|
||||
import { latencyHistogram } from "../lib/histogram.js";
|
||||
|
||||
const logger = makeLogger("jobs");
|
||||
|
||||
const router = Router();
|
||||
|
||||
@@ -24,8 +31,18 @@ async function getInvoiceById(id: string) {
|
||||
* return immediately with "evaluating" state instead of blocking 5-8 seconds.
|
||||
*/
|
||||
async function runEvalInBackground(jobId: string, request: string): Promise<void> {
|
||||
const evalStart = Date.now();
|
||||
try {
|
||||
const evalResult = await agentService.evaluateRequest(request);
|
||||
latencyHistogram.record("eval_phase", Date.now() - evalStart);
|
||||
|
||||
logger.info("eval result", {
|
||||
jobId,
|
||||
accepted: evalResult.accepted,
|
||||
reason: evalResult.reason,
|
||||
inputTokens: evalResult.inputTokens,
|
||||
outputTokens: evalResult.outputTokens,
|
||||
});
|
||||
|
||||
if (evalResult.accepted) {
|
||||
const inputEst = pricingService.estimateInputTokens(request);
|
||||
@@ -65,11 +82,13 @@ async function runEvalInBackground(jobId: string, request: string): Promise<void
|
||||
})
|
||||
.where(eq(jobs.id, jobId));
|
||||
});
|
||||
eventBus.publish({ type: "job:state", jobId, state: "awaiting_work_payment" });
|
||||
} else {
|
||||
await db
|
||||
.update(jobs)
|
||||
.set({ state: "rejected", rejectionReason: evalResult.reason, updatedAt: new Date() })
|
||||
.where(eq(jobs.id, jobId));
|
||||
eventBus.publish({ type: "job:state", jobId, state: "rejected" });
|
||||
}
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Evaluation error";
|
||||
@@ -77,15 +96,25 @@ async function runEvalInBackground(jobId: string, request: string): Promise<void
|
||||
.update(jobs)
|
||||
.set({ state: "failed", errorMessage: message, updatedAt: new Date() })
|
||||
.where(eq(jobs.id, jobId));
|
||||
eventBus.publish({ type: "job:failed", jobId, reason: message });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the AI work execution in a background task so HTTP polls return fast.
|
||||
* Uses streaming so any connected SSE client receives tokens in real time (#3).
|
||||
*/
|
||||
async function runWorkInBackground(jobId: string, request: string, workAmountSats: number, btcPriceUsd: number | null): Promise<void> {
|
||||
const workStart = Date.now();
|
||||
try {
|
||||
const workResult = await agentService.executeWork(request);
|
||||
eventBus.publish({ type: "job:state", jobId, state: "executing" });
|
||||
|
||||
const workResult = await agentService.executeWorkStreaming(request, (delta) => {
|
||||
streamRegistry.write(jobId, delta);
|
||||
});
|
||||
|
||||
streamRegistry.end(jobId);
|
||||
latencyHistogram.record("work_phase", Date.now() - workStart);
|
||||
|
||||
const actualCostUsd = pricingService.calculateActualCostUsd(
|
||||
workResult.inputTokens,
|
||||
@@ -112,12 +141,24 @@ async function runWorkInBackground(jobId: string, request: string, workAmountSat
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(jobs.id, jobId));
|
||||
|
||||
logger.info("work completed", {
|
||||
jobId,
|
||||
inputTokens: workResult.inputTokens,
|
||||
outputTokens: workResult.outputTokens,
|
||||
actualAmountSats,
|
||||
refundAmountSats,
|
||||
refundState,
|
||||
});
|
||||
eventBus.publish({ type: "job:completed", jobId, result: workResult.result });
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Execution error";
|
||||
streamRegistry.end(jobId);
|
||||
await db
|
||||
.update(jobs)
|
||||
.set({ state: "failed", errorMessage: message, updatedAt: new Date() })
|
||||
.where(eq(jobs.id, jobId));
|
||||
eventBus.publish({ type: "job:failed", jobId, reason: message });
|
||||
}
|
||||
}
|
||||
|
||||
@@ -149,6 +190,10 @@ async function advanceJob(job: Job): Promise<Job | null> {
|
||||
|
||||
if (!advanced) return getJobById(job.id);
|
||||
|
||||
logger.info("invoice paid", { jobId: job.id, invoiceType: "eval", paymentHash: evalInvoice.paymentHash });
|
||||
eventBus.publish({ type: "job:paid", jobId: job.id, invoiceType: "eval" });
|
||||
eventBus.publish({ type: "job:state", jobId: job.id, state: "evaluating" });
|
||||
|
||||
// Fire AI eval in background — poll returns immediately with "evaluating"
|
||||
setImmediate(() => { void runEvalInBackground(job.id, job.request); });
|
||||
|
||||
@@ -177,6 +222,12 @@ async function advanceJob(job: Job): Promise<Job | null> {
|
||||
|
||||
if (!advanced) return getJobById(job.id);
|
||||
|
||||
logger.info("invoice paid", { jobId: job.id, invoiceType: "work", paymentHash: workInvoice.paymentHash });
|
||||
eventBus.publish({ type: "job:paid", jobId: job.id, invoiceType: "work" });
|
||||
|
||||
// Register stream slot before firing background work so first tokens aren't lost
|
||||
streamRegistry.register(job.id);
|
||||
|
||||
// Fire AI work in background — poll returns immediately with "executing"
|
||||
setImmediate(() => { void runWorkInBackground(job.id, job.request, job.workAmountSats ?? 0, job.btcPriceUsd); });
|
||||
|
||||
@@ -188,7 +239,7 @@ async function advanceJob(job: Job): Promise<Job | null> {
|
||||
|
||||
// ── POST /jobs ────────────────────────────────────────────────────────────────
|
||||
|
||||
router.post("/jobs", async (req: Request, res: Response) => {
|
||||
router.post("/jobs", jobsLimiter, async (req: Request, res: Response) => {
|
||||
const parseResult = CreateJobBody.safeParse(req.body);
|
||||
if (!parseResult.success) {
|
||||
const issue = parseResult.error.issues[0];
|
||||
@@ -221,6 +272,8 @@ router.post("/jobs", async (req: Request, res: Response) => {
|
||||
await tx.update(jobs).set({ evalInvoiceId: invoiceId, updatedAt: new Date() }).where(eq(jobs.id, jobId));
|
||||
});
|
||||
|
||||
logger.info("job created", { jobId, evalAmountSats: evalFee, stubMode: lnbitsService.stubMode });
|
||||
|
||||
res.status(201).json({
|
||||
jobId,
|
||||
evalInvoice: {
|
||||
@@ -231,6 +284,7 @@ router.post("/jobs", async (req: Request, res: Response) => {
|
||||
});
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Failed to create job";
|
||||
logger.error("job creation failed", { error: message });
|
||||
res.status(500).json({ error: message });
|
||||
}
|
||||
});
|
||||
@@ -404,4 +458,130 @@ router.post("/jobs/:id/refund", async (req: Request, res: Response) => {
|
||||
}
|
||||
});
|
||||
|
||||
// ── GET /jobs/:id/stream ──────────────────────────────────────────────────────
|
||||
// Server-Sent Events (#3): streams Claude token deltas in real time while the
|
||||
// job is executing. If the job is already complete, sends the full result then
|
||||
// closes. If the job isn't executing yet, waits up to 60 s for it to start.
|
||||
|
||||
router.get("/jobs/:id/stream", async (req: Request, res: Response) => {
|
||||
const paramResult = GetJobParams.safeParse(req.params);
|
||||
if (!paramResult.success) {
|
||||
res.status(400).json({ error: "Invalid job id" });
|
||||
return;
|
||||
}
|
||||
const { id } = paramResult.data;
|
||||
|
||||
const job = await getJobById(id);
|
||||
if (!job) {
|
||||
res.status(404).json({ error: "Job not found" });
|
||||
return;
|
||||
}
|
||||
|
||||
res.setHeader("Content-Type", "text/event-stream");
|
||||
res.setHeader("Cache-Control", "no-cache");
|
||||
res.setHeader("Connection", "keep-alive");
|
||||
res.setHeader("X-Accel-Buffering", "no");
|
||||
res.flushHeaders();
|
||||
|
||||
const sendEvent = (event: string, data: unknown) => {
|
||||
res.write(`event: ${event}\ndata: ${JSON.stringify(data)}\n\n`);
|
||||
};
|
||||
|
||||
// Job already complete — replay full result immediately
|
||||
if (job.state === "complete" && job.result) {
|
||||
sendEvent("token", { text: job.result });
|
||||
sendEvent("done", { jobId: id, state: "complete" });
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
|
||||
if (job.state === "failed") {
|
||||
sendEvent("error", { jobId: id, message: job.errorMessage ?? "Job failed" });
|
||||
res.end();
|
||||
return;
|
||||
}
|
||||
|
||||
// Job is executing or about to execute — pipe the live stream
|
||||
const sendHeartbeat = setInterval(() => {
|
||||
res.write(": heartbeat\n\n");
|
||||
}, 15_000);
|
||||
|
||||
const cleanup = () => {
|
||||
clearInterval(sendHeartbeat);
|
||||
};
|
||||
|
||||
req.on("close", cleanup);
|
||||
|
||||
// ── Wait for stream slot (fixes #16 race condition) ──────────────────────
|
||||
// After the bus wait we re-check BOTH the stream registry AND the DB so we
|
||||
// handle: (a) job completed while we waited (stream already gone), (b) job
|
||||
// still executing but stream was registered after we first checked.
|
||||
let stream = streamRegistry.get(id);
|
||||
let currentJob = job;
|
||||
|
||||
if (!stream) {
|
||||
await new Promise<void>((resolve) => {
|
||||
// 90 s timeout — generous enough for slow payment confirmations on mainnet
|
||||
const deadline = setTimeout(resolve, 90_000);
|
||||
const busListener = (data: Parameters<typeof eventBus.publish>[0]) => {
|
||||
if ("jobId" in data && data.jobId === id) {
|
||||
clearTimeout(deadline);
|
||||
eventBus.off("bus", busListener);
|
||||
resolve();
|
||||
}
|
||||
};
|
||||
eventBus.on("bus", busListener);
|
||||
});
|
||||
|
||||
// Refresh both stream slot and job state after waiting
|
||||
stream = streamRegistry.get(id);
|
||||
currentJob = (await getJobById(id)) ?? currentJob;
|
||||
}
|
||||
|
||||
// ── Resolve: stream available ─────────────────────────────────────────────
|
||||
if (stream) {
|
||||
const attachToStream = (s: typeof stream) => {
|
||||
s!.on("data", (chunk: Buffer) => {
|
||||
sendEvent("token", { text: chunk.toString("utf8") });
|
||||
});
|
||||
s!.on("end", () => {
|
||||
sendEvent("done", { jobId: id, state: "complete" });
|
||||
res.end();
|
||||
cleanup();
|
||||
});
|
||||
s!.on("error", (err: Error) => {
|
||||
sendEvent("error", { jobId: id, message: err.message });
|
||||
res.end();
|
||||
cleanup();
|
||||
});
|
||||
};
|
||||
attachToStream(stream);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Resolve: job completed while we waited (stream already gone) ──────────
|
||||
if (currentJob.state === "complete" && currentJob.result) {
|
||||
sendEvent("token", { text: currentJob.result });
|
||||
sendEvent("done", { jobId: id, state: "complete" });
|
||||
res.end();
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
if (currentJob.state === "failed") {
|
||||
sendEvent("error", { jobId: id, message: currentJob.errorMessage ?? "Job failed" });
|
||||
res.end();
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Resolve: timeout with no activity — tell client to fall back to polling
|
||||
sendEvent("error", {
|
||||
jobId: id,
|
||||
message: "Stream timed out. Poll GET /api/jobs/:id for current state.",
|
||||
});
|
||||
res.end();
|
||||
cleanup();
|
||||
});
|
||||
|
||||
export default router;
|
||||
|
||||
19
artifacts/api-server/src/routes/metrics.ts
Normal file
19
artifacts/api-server/src/routes/metrics.ts
Normal file
@@ -0,0 +1,19 @@
|
||||
import { Router, type Request, type Response } from "express";
|
||||
import { metricsService } from "../lib/metrics.js";
|
||||
import { makeLogger } from "../lib/logger.js";
|
||||
|
||||
const router = Router();
|
||||
const logger = makeLogger("metrics");
|
||||
|
||||
router.get("/metrics", async (_req: Request, res: Response) => {
|
||||
try {
|
||||
const snapshot = await metricsService.snapshot();
|
||||
res.json(snapshot);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : "Failed to collect metrics";
|
||||
logger.error("snapshot failed", { error: message });
|
||||
res.status(500).json({ error: message });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
@@ -3,6 +3,8 @@ import { randomBytes, randomUUID } from "crypto";
|
||||
import { db, sessions, sessionRequests, type Session } from "@workspace/db";
|
||||
import { eq, and } from "drizzle-orm";
|
||||
import { lnbitsService } from "../lib/lnbits.js";
|
||||
import { sessionsLimiter } from "../lib/rate-limiter.js";
|
||||
import { eventBus } from "../lib/event-bus.js";
|
||||
import { agentService } from "../lib/agent.js";
|
||||
import { pricingService } from "../lib/pricing.js";
|
||||
import { getBtcPriceUsd, usdToSats } from "../lib/btc-oracle.js";
|
||||
@@ -133,7 +135,7 @@ async function advanceTopup(session: Session): Promise<Session> {
|
||||
|
||||
// ── POST /sessions ─────────────────────────────────────────────────────────────
|
||||
|
||||
router.post("/sessions", async (req: Request, res: Response) => {
|
||||
router.post("/sessions", sessionsLimiter, async (req: Request, res: Response) => {
|
||||
const rawAmount = req.body?.amount_sats;
|
||||
const amountSats = parseInt(String(rawAmount ?? ""), 10);
|
||||
|
||||
@@ -220,7 +222,7 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
|
||||
}
|
||||
|
||||
try {
|
||||
let session = await getSessionById(id);
|
||||
const session = await getSessionById(id);
|
||||
if (!session) { res.status(404).json({ error: "Session not found" }); return; }
|
||||
|
||||
// Auth
|
||||
|
||||
@@ -9,6 +9,8 @@ const router = Router();
|
||||
* BASE URL. Agents and testers can run the full test suite with one command:
|
||||
*
|
||||
* curl -s https://your-url.replit.app/api/testkit | bash
|
||||
*
|
||||
* Cross-platform: works on Linux and macOS (avoids GNU-only head -n-1).
|
||||
*/
|
||||
router.get("/testkit", (req: Request, res: Response) => {
|
||||
const proto =
|
||||
@@ -31,16 +33,17 @@ FAIL=0
|
||||
SKIP=0
|
||||
|
||||
note() { echo " [\$1] \$2"; }
|
||||
jq_field() { echo "\$1" | jq -r "\$2" 2>/dev/null || echo ""; }
|
||||
sep() { echo; echo "=== $* ==="; }
|
||||
sep() { echo; echo "=== $* ==="; }
|
||||
# body_of: strip last line (HTTP status code) — works on GNU and BSD (macOS)
|
||||
body_of() { echo "\$1" | sed '$d'; }
|
||||
code_of() { echo "\$1" | tail -n1; }
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1 — Health check
|
||||
# ---------------------------------------------------------------------------
|
||||
sep "Test 1 — Health check"
|
||||
T1_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/healthz")
|
||||
T1_BODY=$(echo "$T1_RES" | head -n-1)
|
||||
T1_CODE=$(echo "$T1_RES" | tail -n1)
|
||||
T1_BODY=$(body_of "$T1_RES"); T1_CODE=$(code_of "$T1_RES")
|
||||
if [[ "$T1_CODE" == "200" ]] && [[ "$(echo "$T1_BODY" | jq -r '.status' 2>/dev/null)" == "ok" ]]; then
|
||||
note PASS "HTTP 200, status=ok"
|
||||
PASS=$((PASS+1))
|
||||
@@ -56,8 +59,7 @@ sep "Test 2 — Create job"
|
||||
T2_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"request":"Explain the Lightning Network in two sentences"}')
|
||||
T2_BODY=$(echo "$T2_RES" | head -n-1)
|
||||
T2_CODE=$(echo "$T2_RES" | tail -n1)
|
||||
T2_BODY=$(body_of "$T2_RES"); T2_CODE=$(code_of "$T2_RES")
|
||||
JOB_ID=$(echo "$T2_BODY" | jq -r '.jobId' 2>/dev/null || echo "")
|
||||
EVAL_AMT=$(echo "$T2_BODY" | jq -r '.evalInvoice.amountSats' 2>/dev/null || echo "")
|
||||
if [[ "$T2_CODE" == "201" && -n "$JOB_ID" && "$EVAL_AMT" == "10" ]]; then
|
||||
@@ -73,8 +75,7 @@ fi
|
||||
# ---------------------------------------------------------------------------
|
||||
sep "Test 3 — Poll before payment"
|
||||
T3_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
|
||||
T3_BODY=$(echo "$T3_RES" | head -n-1)
|
||||
T3_CODE=$(echo "$T3_RES" | tail -n1)
|
||||
T3_BODY=$(body_of "$T3_RES"); T3_CODE=$(code_of "$T3_RES")
|
||||
STATE_T3=$(echo "$T3_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
EVAL_AMT_ECHO=$(echo "$T3_BODY" | jq -r '.evalInvoice.amountSats' 2>/dev/null || echo "")
|
||||
EVAL_HASH=$(echo "$T3_BODY" | jq -r '.evalInvoice.paymentHash' 2>/dev/null || echo "")
|
||||
@@ -99,8 +100,7 @@ fi
|
||||
sep "Test 4 — Pay eval invoice (stub)"
|
||||
if [[ -n "$EVAL_HASH" && "$EVAL_HASH" != "null" ]]; then
|
||||
T4_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$EVAL_HASH")
|
||||
T4_BODY=$(echo "$T4_RES" | head -n-1)
|
||||
T4_CODE=$(echo "$T4_RES" | tail -n1)
|
||||
T4_BODY=$(body_of "$T4_RES"); T4_CODE=$(code_of "$T4_RES")
|
||||
if [[ "$T4_CODE" == "200" ]] && [[ "$(echo "$T4_BODY" | jq -r '.ok' 2>/dev/null)" == "true" ]]; then
|
||||
note PASS "Eval invoice marked paid"
|
||||
PASS=$((PASS+1))
|
||||
@@ -114,25 +114,32 @@ else
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5 — Poll after eval payment
|
||||
# Test 5 — Poll after eval payment (with retry loop — real AI eval takes 2–5 s)
|
||||
# ---------------------------------------------------------------------------
|
||||
sep "Test 5 — Poll after eval (state advance)"
|
||||
sleep 2
|
||||
T5_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
|
||||
T5_BODY=$(echo "$T5_RES" | head -n-1)
|
||||
T5_CODE=$(echo "$T5_RES" | tail -n1)
|
||||
STATE_T5=$(echo "$T5_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
WORK_AMT=$(echo "$T5_BODY" | jq -r '.workInvoice.amountSats' 2>/dev/null || echo "")
|
||||
WORK_HASH=$(echo "$T5_BODY" | jq -r '.workInvoice.paymentHash' 2>/dev/null || echo "")
|
||||
START_T5=$(date +%s)
|
||||
T5_TIMEOUT=30
|
||||
STATE_T5=""; WORK_AMT=""; WORK_HASH=""; T5_BODY=""; T5_CODE=""
|
||||
while :; do
|
||||
T5_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
|
||||
T5_BODY=$(body_of "$T5_RES"); T5_CODE=$(code_of "$T5_RES")
|
||||
STATE_T5=$(echo "$T5_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
WORK_AMT=$(echo "$T5_BODY" | jq -r '.workInvoice.amountSats' 2>/dev/null || echo "")
|
||||
WORK_HASH=$(echo "$T5_BODY" | jq -r '.workInvoice.paymentHash' 2>/dev/null || echo "")
|
||||
NOW_T5=$(date +%s); ELAPSED_T5=$((NOW_T5 - START_T5))
|
||||
if [[ "$STATE_T5" == "awaiting_work_payment" || "$STATE_T5" == "rejected" ]]; then break; fi
|
||||
if (( ELAPSED_T5 > T5_TIMEOUT )); then break; fi
|
||||
sleep 2
|
||||
done
|
||||
if [[ "$T5_CODE" == "200" && "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_AMT" && "$WORK_AMT" != "null" ]]; then
|
||||
note PASS "state=awaiting_work_payment, workInvoice.amountSats=$WORK_AMT"
|
||||
note PASS "state=awaiting_work_payment in $ELAPSED_T5 s, workInvoice.amountSats=$WORK_AMT"
|
||||
PASS=$((PASS+1))
|
||||
elif [[ "$T5_CODE" == "200" && "$STATE_T5" == "rejected" ]]; then
|
||||
note PASS "Request correctly rejected by agent after eval"
|
||||
note PASS "Request correctly rejected by agent after eval (in $ELAPSED_T5 s)"
|
||||
PASS=$((PASS+1))
|
||||
WORK_HASH=""
|
||||
else
|
||||
note FAIL "code=$T5_CODE state=$STATE_T5 body=$T5_BODY"
|
||||
note FAIL "code=$T5_CODE state=$STATE_T5 body=$T5_BODY (after $ELAPSED_T5 s)"
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
|
||||
@@ -142,8 +149,7 @@ fi
|
||||
sep "Test 6 — Pay work invoice + get result"
|
||||
if [[ "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_HASH" && "$WORK_HASH" != "null" ]]; then
|
||||
T6_PAY_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$WORK_HASH")
|
||||
T6_PAY_BODY=$(echo "$T6_PAY_RES" | head -n-1)
|
||||
T6_PAY_CODE=$(echo "$T6_PAY_RES" | tail -n1)
|
||||
T6_PAY_BODY=$(body_of "$T6_PAY_RES"); T6_PAY_CODE=$(code_of "$T6_PAY_RES")
|
||||
if [[ "$T6_PAY_CODE" != "200" ]] || [[ "$(echo "$T6_PAY_BODY" | jq -r '.ok' 2>/dev/null)" != "true" ]]; then
|
||||
note FAIL "Work payment stub failed: code=$T6_PAY_CODE body=$T6_PAY_BODY"
|
||||
FAIL=$((FAIL+1))
|
||||
@@ -152,11 +158,10 @@ if [[ "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_HASH" && "$WORK_HASH"
|
||||
TIMEOUT=30
|
||||
while :; do
|
||||
T6_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
|
||||
T6_BODY=$(echo "$T6_RES" | head -n-1)
|
||||
T6_BODY=$(body_of "$T6_RES")
|
||||
STATE_T6=$(echo "$T6_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
RESULT_T6=$(echo "$T6_BODY" | jq -r '.result' 2>/dev/null || echo "")
|
||||
NOW_TS=$(date +%s)
|
||||
ELAPSED=$((NOW_TS - START_TS))
|
||||
NOW_TS=$(date +%s); ELAPSED=$((NOW_TS - START_TS))
|
||||
if [[ "$STATE_T6" == "complete" && -n "$RESULT_T6" && "$RESULT_T6" != "null" ]]; then
|
||||
note PASS "state=complete in $ELAPSED s"
|
||||
echo " Result: \${RESULT_T6:0:200}..."
|
||||
@@ -177,33 +182,13 @@ else
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 7 — Demo endpoint
|
||||
# ---------------------------------------------------------------------------
|
||||
sep "Test 7 — Demo endpoint"
|
||||
START_DEMO=$(date +%s)
|
||||
T7_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo?request=What+is+a+satoshi")
|
||||
T7_BODY=$(echo "$T7_RES" | head -n-1)
|
||||
T7_CODE=$(echo "$T7_RES" | tail -n1)
|
||||
END_DEMO=$(date +%s)
|
||||
ELAPSED_DEMO=$((END_DEMO - START_DEMO))
|
||||
RESULT_T7=$(echo "$T7_BODY" | jq -r '.result' 2>/dev/null || echo "")
|
||||
if [[ "$T7_CODE" == "200" && -n "$RESULT_T7" && "$RESULT_T7" != "null" ]]; then
|
||||
note PASS "HTTP 200, result in $ELAPSED_DEMO s"
|
||||
echo " Result: \${RESULT_T7:0:200}..."
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
note FAIL "code=$T7_CODE body=$T7_BODY"
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 8 — Input validation (4 sub-cases)
|
||||
# Test 8 — Input validation (run BEFORE test 7 to avoid rate-limit interference)
|
||||
# ---------------------------------------------------------------------------
|
||||
sep "Test 8 — Input validation"
|
||||
|
||||
T8A_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\
|
||||
-H "Content-Type: application/json" -d '{}')
|
||||
T8A_BODY=$(echo "$T8A_RES" | head -n-1); T8A_CODE=$(echo "$T8A_RES" | tail -n1)
|
||||
T8A_BODY=$(body_of "$T8A_RES"); T8A_CODE=$(code_of "$T8A_RES")
|
||||
if [[ "$T8A_CODE" == "400" && -n "$(echo "$T8A_BODY" | jq -r '.error' 2>/dev/null)" ]]; then
|
||||
note PASS "8a: Missing request body → HTTP 400"
|
||||
PASS=$((PASS+1))
|
||||
@@ -213,7 +198,7 @@ else
|
||||
fi
|
||||
|
||||
T8B_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/does-not-exist")
|
||||
T8B_BODY=$(echo "$T8B_RES" | head -n-1); T8B_CODE=$(echo "$T8B_RES" | tail -n1)
|
||||
T8B_BODY=$(body_of "$T8B_RES"); T8B_CODE=$(code_of "$T8B_RES")
|
||||
if [[ "$T8B_CODE" == "404" && -n "$(echo "$T8B_BODY" | jq -r '.error' 2>/dev/null)" ]]; then
|
||||
note PASS "8b: Unknown job ID → HTTP 404"
|
||||
PASS=$((PASS+1))
|
||||
@@ -222,8 +207,9 @@ else
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
|
||||
# 8c runs here — before tests 7 and 9 consume rate-limit quota
|
||||
T8C_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo")
|
||||
T8C_BODY=$(echo "$T8C_RES" | head -n-1); T8C_CODE=$(echo "$T8C_RES" | tail -n1)
|
||||
T8C_BODY=$(body_of "$T8C_RES"); T8C_CODE=$(code_of "$T8C_RES")
|
||||
if [[ "$T8C_CODE" == "400" && -n "$(echo "$T8C_BODY" | jq -r '.error' 2>/dev/null)" ]]; then
|
||||
note PASS "8c: Demo missing param → HTTP 400"
|
||||
PASS=$((PASS+1))
|
||||
@@ -236,7 +222,7 @@ LONG_STR=$(node -e "process.stdout.write('x'.repeat(501))" 2>/dev/null || python
|
||||
T8D_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d "{\\"request\\":\\"$LONG_STR\\"}")
|
||||
T8D_BODY=$(echo "$T8D_RES" | head -n-1); T8D_CODE=$(echo "$T8D_RES" | tail -n1)
|
||||
T8D_BODY=$(body_of "$T8D_RES"); T8D_CODE=$(code_of "$T8D_RES")
|
||||
T8D_ERR=$(echo "$T8D_BODY" | jq -r '.error' 2>/dev/null || echo "")
|
||||
if [[ "$T8D_CODE" == "400" && "$T8D_ERR" == *"500 characters"* ]]; then
|
||||
note PASS "8d: 501-char request → HTTP 400 with character limit error"
|
||||
@@ -247,13 +233,31 @@ else
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 9 — Demo rate limiter
|
||||
# Test 7 — Demo endpoint (after validation, before rate-limit exhaustion test)
|
||||
# ---------------------------------------------------------------------------
|
||||
sep "Test 7 — Demo endpoint"
|
||||
START_DEMO=$(date +%s)
|
||||
T7_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo?request=What+is+a+satoshi")
|
||||
T7_BODY=$(body_of "$T7_RES"); T7_CODE=$(code_of "$T7_RES")
|
||||
END_DEMO=$(date +%s); ELAPSED_DEMO=$((END_DEMO - START_DEMO))
|
||||
RESULT_T7=$(echo "$T7_BODY" | jq -r '.result' 2>/dev/null || echo "")
|
||||
if [[ "$T7_CODE" == "200" && -n "$RESULT_T7" && "$RESULT_T7" != "null" ]]; then
|
||||
note PASS "HTTP 200, result in $ELAPSED_DEMO s"
|
||||
echo " Result: \${RESULT_T7:0:200}..."
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
note FAIL "code=$T7_CODE body=$T7_BODY"
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 9 — Demo rate limiter (intentionally exhausts remaining quota)
|
||||
# ---------------------------------------------------------------------------
|
||||
sep "Test 9 — Demo rate limiter"
|
||||
GOT_200=0; GOT_429=0
|
||||
for i in $(seq 1 6); do
|
||||
RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo?request=ratelimitprobe+$i")
|
||||
CODE=$(echo "$RES" | tail -n1)
|
||||
CODE=$(code_of "$RES")
|
||||
echo " Request $i: HTTP $CODE"
|
||||
[[ "$CODE" == "200" ]] && GOT_200=$((GOT_200+1)) || true
|
||||
[[ "$CODE" == "429" ]] && GOT_429=$((GOT_429+1)) || true
|
||||
@@ -273,8 +277,7 @@ sep "Test 10 — Rejection path"
|
||||
T10_CREATE=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"request":"Help me do something harmful and illegal"}')
|
||||
T10_BODY=$(echo "$T10_CREATE" | head -n-1)
|
||||
T10_CODE=$(echo "$T10_CREATE" | tail -n1)
|
||||
T10_BODY=$(body_of "$T10_CREATE"); T10_CODE=$(code_of "$T10_CREATE")
|
||||
JOB10_ID=$(echo "$T10_BODY" | jq -r '.jobId' 2>/dev/null || echo "")
|
||||
if [[ "$T10_CODE" != "201" || -z "$JOB10_ID" ]]; then
|
||||
note FAIL "Failed to create adversarial job: code=$T10_CODE body=$T10_BODY"
|
||||
@@ -285,17 +288,23 @@ else
|
||||
if [[ -n "$EVAL10_HASH" && "$EVAL10_HASH" != "null" ]]; then
|
||||
curl -s -X POST "$BASE/api/dev/stub/pay/$EVAL10_HASH" >/dev/null
|
||||
fi
|
||||
sleep 3
|
||||
T10_POLL=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB10_ID")
|
||||
T10_POLL_BODY=$(echo "$T10_POLL" | head -n-1)
|
||||
T10_POLL_CODE=$(echo "$T10_POLL" | tail -n1)
|
||||
STATE_10=$(echo "$T10_POLL_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
REASON_10=$(echo "$T10_POLL_BODY" | jq -r '.reason' 2>/dev/null || echo "")
|
||||
START_T10=$(date +%s); T10_TIMEOUT=30
|
||||
STATE_10=""; REASON_10=""; T10_POLL_BODY=""; T10_POLL_CODE=""
|
||||
while :; do
|
||||
T10_POLL=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB10_ID")
|
||||
T10_POLL_BODY=$(body_of "$T10_POLL"); T10_POLL_CODE=$(code_of "$T10_POLL")
|
||||
STATE_10=$(echo "$T10_POLL_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
REASON_10=$(echo "$T10_POLL_BODY" | jq -r '.reason' 2>/dev/null || echo "")
|
||||
NOW_T10=$(date +%s); ELAPSED_T10=$((NOW_T10 - START_T10))
|
||||
if [[ "$STATE_10" == "rejected" || "$STATE_10" == "failed" ]]; then break; fi
|
||||
if (( ELAPSED_T10 > T10_TIMEOUT )); then break; fi
|
||||
sleep 2
|
||||
done
|
||||
if [[ "$T10_POLL_CODE" == "200" && "$STATE_10" == "rejected" && -n "$REASON_10" && "$REASON_10" != "null" ]]; then
|
||||
note PASS "state=rejected, reason: \${REASON_10:0:120}"
|
||||
note PASS "state=rejected in $ELAPSED_T10 s, reason: \${REASON_10:0:120}"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
note FAIL "code=$T10_POLL_CODE state=$STATE_10 body=$T10_POLL_BODY"
|
||||
note FAIL "code=$T10_POLL_CODE state=$STATE_10 body=$T10_POLL_BODY (after $ELAPSED_T10 s)"
|
||||
FAIL=$((FAIL+1))
|
||||
fi
|
||||
fi
|
||||
@@ -307,8 +316,7 @@ sep "Test 11 — Session: create session (awaiting_payment)"
|
||||
T11_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/sessions" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"amount_sats": 200}')
|
||||
T11_BODY=$(echo "$T11_RES" | head -n-1)
|
||||
T11_CODE=$(echo "$T11_RES" | tail -n1)
|
||||
T11_BODY=$(body_of "$T11_RES"); T11_CODE=$(code_of "$T11_RES")
|
||||
SESSION_ID=$(echo "$T11_BODY" | jq -r '.sessionId' 2>/dev/null || echo "")
|
||||
T11_STATE=$(echo "$T11_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
T11_AMT=$(echo "$T11_BODY" | jq -r '.invoice.amountSats' 2>/dev/null || echo "")
|
||||
@@ -322,12 +330,11 @@ else
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 12 — Session: poll before payment (stub hash present)
|
||||
# Test 12 — Session: poll before payment
|
||||
# ---------------------------------------------------------------------------
|
||||
sep "Test 12 — Session: poll before payment"
|
||||
T12_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/sessions/$SESSION_ID")
|
||||
T12_BODY=$(echo "$T12_RES" | head -n-1)
|
||||
T12_CODE=$(echo "$T12_RES" | tail -n1)
|
||||
T12_BODY=$(body_of "$T12_RES"); T12_CODE=$(code_of "$T12_RES")
|
||||
T12_STATE=$(echo "$T12_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
if [[ -z "$DEPOSIT_HASH" || "$DEPOSIT_HASH" == "null" ]]; then
|
||||
DEPOSIT_HASH=$(echo "$T12_BODY" | jq -r '.invoice.paymentHash' 2>/dev/null || echo "")
|
||||
@@ -348,8 +355,7 @@ if [[ -n "$DEPOSIT_HASH" && "$DEPOSIT_HASH" != "null" ]]; then
|
||||
curl -s -X POST "$BASE/api/dev/stub/pay/$DEPOSIT_HASH" >/dev/null
|
||||
sleep 1
|
||||
T13_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/sessions/$SESSION_ID")
|
||||
T13_BODY=$(echo "$T13_RES" | head -n-1)
|
||||
T13_CODE=$(echo "$T13_RES" | tail -n1)
|
||||
T13_BODY=$(body_of "$T13_RES"); T13_CODE=$(code_of "$T13_RES")
|
||||
T13_STATE=$(echo "$T13_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
T13_BAL=$(echo "$T13_BODY" | jq -r '.balanceSats' 2>/dev/null || echo "")
|
||||
SESSION_MACAROON=$(echo "$T13_BODY" | jq -r '.macaroon' 2>/dev/null || echo "")
|
||||
@@ -375,15 +381,13 @@ if [[ -n "$SESSION_MACAROON" && "$SESSION_MACAROON" != "null" ]]; then
|
||||
-H "Content-Type: application/json" \\
|
||||
-H "Authorization: Bearer $SESSION_MACAROON" \\
|
||||
-d '{"request":"What is Bitcoin in one sentence?"}')
|
||||
T14_BODY=$(echo "$T14_RES" | head -n-1)
|
||||
T14_CODE=$(echo "$T14_RES" | tail -n1)
|
||||
T14_BODY=$(body_of "$T14_RES"); T14_CODE=$(code_of "$T14_RES")
|
||||
T14_STATE=$(echo "$T14_BODY" | jq -r '.state' 2>/dev/null || echo "")
|
||||
T14_DEBITED=$(echo "$T14_BODY" | jq -r '.debitedSats' 2>/dev/null || echo "")
|
||||
T14_BAL=$(echo "$T14_BODY" | jq -r '.balanceRemaining' 2>/dev/null || echo "")
|
||||
END_T14=$(date +%s)
|
||||
ELAPSED_T14=$((END_T14 - START_T14))
|
||||
END_T14=$(date +%s); ELAPSED_T14=$((END_T14 - START_T14))
|
||||
if [[ "$T14_CODE" == "200" && ("$T14_STATE" == "complete" || "$T14_STATE" == "rejected") && -n "$T14_DEBITED" && "$T14_DEBITED" != "null" && -n "$T14_BAL" ]]; then
|
||||
note PASS "state=$T14_STATE in ${ELAPSED_T14}s, debitedSats=$T14_DEBITED, balanceRemaining=$T14_BAL"
|
||||
note PASS "state=$T14_STATE in \${ELAPSED_T14}s, debitedSats=$T14_DEBITED, balanceRemaining=$T14_BAL"
|
||||
PASS=$((PASS+1))
|
||||
else
|
||||
note FAIL "code=$T14_CODE body=$T14_BODY"
|
||||
@@ -402,7 +406,7 @@ if [[ -n "$SESSION_ID" ]]; then
|
||||
T15_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/sessions/$SESSION_ID/request" \\
|
||||
-H "Content-Type: application/json" \\
|
||||
-d '{"request":"What is Bitcoin?"}')
|
||||
T15_CODE=$(echo "$T15_RES" | tail -n1)
|
||||
T15_CODE=$(code_of "$T15_RES")
|
||||
if [[ "$T15_CODE" == "401" ]]; then
|
||||
note PASS "HTTP 401 without macaroon"
|
||||
PASS=$((PASS+1))
|
||||
@@ -424,8 +428,7 @@ if [[ -n "$SESSION_MACAROON" && "$SESSION_MACAROON" != "null" ]]; then
|
||||
-H "Content-Type: application/json" \\
|
||||
-H "Authorization: Bearer $SESSION_MACAROON" \\
|
||||
-d '{"amount_sats": 500}')
|
||||
T16_BODY=$(echo "$T16_RES" | head -n-1)
|
||||
T16_CODE=$(echo "$T16_RES" | tail -n1)
|
||||
T16_BODY=$(body_of "$T16_RES"); T16_CODE=$(code_of "$T16_RES")
|
||||
T16_PR=$(echo "$T16_BODY" | jq -r '.topup.paymentRequest' 2>/dev/null || echo "")
|
||||
T16_AMT=$(echo "$T16_BODY" | jq -r '.topup.amountSats' 2>/dev/null || echo "")
|
||||
if [[ "$T16_CODE" == "200" && -n "$T16_PR" && "$T16_PR" != "null" && "$T16_AMT" == "500" ]]; then
|
||||
|
||||
Reference in New Issue
Block a user