diff --git a/artifacts/api-server/src/routes/index.ts b/artifacts/api-server/src/routes/index.ts index fbe097f..f0b4773 100644 --- a/artifacts/api-server/src/routes/index.ts +++ b/artifacts/api-server/src/routes/index.ts @@ -3,12 +3,14 @@ import healthRouter from "./health.js"; import jobsRouter from "./jobs.js"; import demoRouter from "./demo.js"; import devRouter from "./dev.js"; +import testkitRouter from "./testkit.js"; const router: IRouter = Router(); router.use(healthRouter); router.use(jobsRouter); router.use(demoRouter); +router.use(testkitRouter); if (process.env.NODE_ENV !== "production") { router.use(devRouter); diff --git a/artifacts/api-server/src/routes/testkit.ts b/artifacts/api-server/src/routes/testkit.ts new file mode 100644 index 0000000..7dfc0e8 --- /dev/null +++ b/artifacts/api-server/src/routes/testkit.ts @@ -0,0 +1,318 @@ +import { Router, type Request, type Response } from "express"; + +const router = Router(); + +/** + * GET /api/testkit + * + * Returns a self-contained bash script pre-configured with this server's + * BASE URL. Agents and testers can run the full test suite with one command: + * + * curl -s https://your-url.replit.app/api/testkit | bash + */ +router.get("/testkit", (req: Request, res: Response) => { + const proto = + (req.headers["x-forwarded-proto"] as string | undefined)?.split(",")[0]?.trim() ?? "https"; + const host = (req.headers["x-forwarded-host"] as string | undefined) ?? req.hostname; + const base = `${proto}://${host}`; + + const script = `#!/usr/bin/env bash +set -euo pipefail + +BASE="${base}" + +echo "Timmy Test Kit" +echo "Target: $BASE" +echo "$(date)" +echo + +PASS=0 +FAIL=0 +SKIP=0 + +note() { echo " [\$1] \$2"; } +jq_field() { echo "\$1" | jq -r "\$2" 2>/dev/null || echo ""; } +sep() { echo; echo "=== $* ==="; } + +# --------------------------------------------------------------------------- +# Test 1 — Health check +# --------------------------------------------------------------------------- +sep "Test 1 — Health check" +T1_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/healthz") +T1_BODY=$(echo "$T1_RES" | head -n-1) +T1_CODE=$(echo "$T1_RES" | tail -n1) +if [[ "$T1_CODE" == "200" ]] && [[ "$(echo "$T1_BODY" | jq -r '.status' 2>/dev/null)" == "ok" ]]; then + note PASS "HTTP 200, status=ok" + PASS=$((PASS+1)) +else + note FAIL "code=$T1_CODE body=$T1_BODY" + FAIL=$((FAIL+1)) +fi + +# --------------------------------------------------------------------------- +# Test 2 — Create job +# --------------------------------------------------------------------------- +sep "Test 2 — Create job" +T2_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\ + -H "Content-Type: application/json" \\ + -d '{"request":"Explain the Lightning Network in two sentences"}') +T2_BODY=$(echo "$T2_RES" | head -n-1) +T2_CODE=$(echo "$T2_RES" | tail -n1) +JOB_ID=$(echo "$T2_BODY" | jq -r '.jobId' 2>/dev/null || echo "") +EVAL_AMT=$(echo "$T2_BODY" | jq -r '.evalInvoice.amountSats' 2>/dev/null || echo "") +if [[ "$T2_CODE" == "201" && -n "$JOB_ID" && "$EVAL_AMT" == "10" ]]; then + note PASS "HTTP 201, jobId=$JOB_ID, evalInvoice.amountSats=10" + PASS=$((PASS+1)) +else + note FAIL "code=$T2_CODE body=$T2_BODY" + FAIL=$((FAIL+1)) +fi + +# --------------------------------------------------------------------------- +# Test 3 — Poll before payment +# --------------------------------------------------------------------------- +sep "Test 3 — Poll before payment" +T3_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID") +T3_BODY=$(echo "$T3_RES" | head -n-1) +T3_CODE=$(echo "$T3_RES" | tail -n1) +STATE_T3=$(echo "$T3_BODY" | jq -r '.state' 2>/dev/null || echo "") +EVAL_AMT_ECHO=$(echo "$T3_BODY" | jq -r '.evalInvoice.amountSats' 2>/dev/null || echo "") +EVAL_HASH=$(echo "$T3_BODY" | jq -r '.evalInvoice.paymentHash' 2>/dev/null || echo "") +if [[ "$T3_CODE" == "200" && "$STATE_T3" == "awaiting_eval_payment" && "$EVAL_AMT_ECHO" == "10" ]]; then + note PASS "state=awaiting_eval_payment, evalInvoice echoed" + PASS=$((PASS+1)) +else + note FAIL "code=$T3_CODE body=$T3_BODY" + FAIL=$((FAIL+1)) +fi +if [[ -n "$EVAL_HASH" && "$EVAL_HASH" != "null" ]]; then + note PASS "evalInvoice.paymentHash present (stub mode active)" + PASS=$((PASS+1)) +else + note FAIL "evalInvoice.paymentHash missing — stub mode not active" + FAIL=$((FAIL+1)) +fi + +# --------------------------------------------------------------------------- +# Test 4 — Pay eval invoice (stub endpoint) +# --------------------------------------------------------------------------- +sep "Test 4 — Pay eval invoice (stub)" +if [[ -n "$EVAL_HASH" && "$EVAL_HASH" != "null" ]]; then + T4_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$EVAL_HASH") + T4_BODY=$(echo "$T4_RES" | head -n-1) + T4_CODE=$(echo "$T4_RES" | tail -n1) + if [[ "$T4_CODE" == "200" ]] && [[ "$(echo "$T4_BODY" | jq -r '.ok' 2>/dev/null)" == "true" ]]; then + note PASS "Eval invoice marked paid" + PASS=$((PASS+1)) + else + note FAIL "code=$T4_CODE body=$T4_BODY" + FAIL=$((FAIL+1)) + fi +else + note SKIP "No eval hash — skipping" + SKIP=$((SKIP+1)) +fi + +# --------------------------------------------------------------------------- +# Test 5 — Poll after eval payment +# --------------------------------------------------------------------------- +sep "Test 5 — Poll after eval (state advance)" +sleep 2 +T5_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID") +T5_BODY=$(echo "$T5_RES" | head -n-1) +T5_CODE=$(echo "$T5_RES" | tail -n1) +STATE_T5=$(echo "$T5_BODY" | jq -r '.state' 2>/dev/null || echo "") +WORK_AMT=$(echo "$T5_BODY" | jq -r '.workInvoice.amountSats' 2>/dev/null || echo "") +WORK_HASH=$(echo "$T5_BODY" | jq -r '.workInvoice.paymentHash' 2>/dev/null || echo "") +if [[ "$T5_CODE" == "200" && "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_AMT" && "$WORK_AMT" != "null" ]]; then + note PASS "state=awaiting_work_payment, workInvoice.amountSats=$WORK_AMT" + PASS=$((PASS+1)) +elif [[ "$T5_CODE" == "200" && "$STATE_T5" == "rejected" ]]; then + note PASS "Request correctly rejected by agent after eval" + PASS=$((PASS+1)) + WORK_HASH="" +else + note FAIL "code=$T5_CODE state=$STATE_T5 body=$T5_BODY" + FAIL=$((FAIL+1)) +fi + +# --------------------------------------------------------------------------- +# Test 6 — Pay work invoice + poll for result +# --------------------------------------------------------------------------- +sep "Test 6 — Pay work invoice + get result" +if [[ "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_HASH" && "$WORK_HASH" != "null" ]]; then + T6_PAY_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$WORK_HASH") + T6_PAY_BODY=$(echo "$T6_PAY_RES" | head -n-1) + T6_PAY_CODE=$(echo "$T6_PAY_RES" | tail -n1) + if [[ "$T6_PAY_CODE" != "200" ]] || [[ "$(echo "$T6_PAY_BODY" | jq -r '.ok' 2>/dev/null)" != "true" ]]; then + note FAIL "Work payment stub failed: code=$T6_PAY_CODE body=$T6_PAY_BODY" + FAIL=$((FAIL+1)) + else + START_TS=$(date +%s) + TIMEOUT=30 + while :; do + T6_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB_ID") + T6_BODY=$(echo "$T6_RES" | head -n-1) + STATE_T6=$(echo "$T6_BODY" | jq -r '.state' 2>/dev/null || echo "") + RESULT_T6=$(echo "$T6_BODY" | jq -r '.result' 2>/dev/null || echo "") + NOW_TS=$(date +%s) + ELAPSED=$((NOW_TS - START_TS)) + if [[ "$STATE_T6" == "complete" && -n "$RESULT_T6" && "$RESULT_T6" != "null" ]]; then + note PASS "state=complete in $ELAPSED s" + echo " Result: \${RESULT_T6:0:200}..." + PASS=$((PASS+1)) + break + fi + if (( ELAPSED > TIMEOUT )); then + note FAIL "Timed out after $TIMEOUT s. Last body: $T6_BODY" + FAIL=$((FAIL+1)) + break + fi + sleep 2 + done + fi +else + note SKIP "No work hash (job may be rejected) — skipping" + SKIP=$((SKIP+1)) +fi + +# --------------------------------------------------------------------------- +# Test 7 — Demo endpoint +# --------------------------------------------------------------------------- +sep "Test 7 — Demo endpoint" +START_DEMO=$(date +%s) +T7_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo?request=What+is+a+satoshi") +T7_BODY=$(echo "$T7_RES" | head -n-1) +T7_CODE=$(echo "$T7_RES" | tail -n1) +END_DEMO=$(date +%s) +ELAPSED_DEMO=$((END_DEMO - START_DEMO)) +RESULT_T7=$(echo "$T7_BODY" | jq -r '.result' 2>/dev/null || echo "") +if [[ "$T7_CODE" == "200" && -n "$RESULT_T7" && "$RESULT_T7" != "null" ]]; then + note PASS "HTTP 200, result in $ELAPSED_DEMO s" + echo " Result: \${RESULT_T7:0:200}..." + PASS=$((PASS+1)) +else + note FAIL "code=$T7_CODE body=$T7_BODY" + FAIL=$((FAIL+1)) +fi + +# --------------------------------------------------------------------------- +# Test 8 — Input validation (4 sub-cases) +# --------------------------------------------------------------------------- +sep "Test 8 — Input validation" + +T8A_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\ + -H "Content-Type: application/json" -d '{}') +T8A_BODY=$(echo "$T8A_RES" | head -n-1); T8A_CODE=$(echo "$T8A_RES" | tail -n1) +if [[ "$T8A_CODE" == "400" && -n "$(echo "$T8A_BODY" | jq -r '.error' 2>/dev/null)" ]]; then + note PASS "8a: Missing request body → HTTP 400" + PASS=$((PASS+1)) +else + note FAIL "8a: code=$T8A_CODE body=$T8A_BODY" + FAIL=$((FAIL+1)) +fi + +T8B_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/does-not-exist") +T8B_BODY=$(echo "$T8B_RES" | head -n-1); T8B_CODE=$(echo "$T8B_RES" | tail -n1) +if [[ "$T8B_CODE" == "404" && -n "$(echo "$T8B_BODY" | jq -r '.error' 2>/dev/null)" ]]; then + note PASS "8b: Unknown job ID → HTTP 404" + PASS=$((PASS+1)) +else + note FAIL "8b: code=$T8B_CODE body=$T8B_BODY" + FAIL=$((FAIL+1)) +fi + +T8C_RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo") +T8C_BODY=$(echo "$T8C_RES" | head -n-1); T8C_CODE=$(echo "$T8C_RES" | tail -n1) +if [[ "$T8C_CODE" == "400" && -n "$(echo "$T8C_BODY" | jq -r '.error' 2>/dev/null)" ]]; then + note PASS "8c: Demo missing param → HTTP 400" + PASS=$((PASS+1)) +else + note FAIL "8c: code=$T8C_CODE body=$T8C_BODY" + FAIL=$((FAIL+1)) +fi + +LONG_STR=$(node -e "process.stdout.write('x'.repeat(501))" 2>/dev/null || python3 -c "print('x'*501,end='')" 2>/dev/null || printf '%501s' | tr ' ' 'x') +T8D_RES=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\ + -H "Content-Type: application/json" \\ + -d "{\\"request\\":\\"$LONG_STR\\"}") +T8D_BODY=$(echo "$T8D_RES" | head -n-1); T8D_CODE=$(echo "$T8D_RES" | tail -n1) +T8D_ERR=$(echo "$T8D_BODY" | jq -r '.error' 2>/dev/null || echo "") +if [[ "$T8D_CODE" == "400" && "$T8D_ERR" == *"500 characters"* ]]; then + note PASS "8d: 501-char request → HTTP 400 with character limit error" + PASS=$((PASS+1)) +else + note FAIL "8d: code=$T8D_CODE body=$T8D_BODY" + FAIL=$((FAIL+1)) +fi + +# --------------------------------------------------------------------------- +# Test 9 — Demo rate limiter +# --------------------------------------------------------------------------- +sep "Test 9 — Demo rate limiter" +GOT_200=0; GOT_429=0 +for i in $(seq 1 6); do + RES=$(curl -s -w "\\n%{http_code}" "$BASE/api/demo?request=ratelimitprobe+$i") + CODE=$(echo "$RES" | tail -n1) + echo " Request $i: HTTP $CODE" + [[ "$CODE" == "200" ]] && GOT_200=$((GOT_200+1)) || true + [[ "$CODE" == "429" ]] && GOT_429=$((GOT_429+1)) || true +done +if [[ "$GOT_429" -ge 1 ]]; then + note PASS "Rate limiter triggered ($GOT_200 x200 $GOT_429 x429)" + PASS=$((PASS+1)) +else + note FAIL "No 429 received — limiter may not be working ($GOT_200 x200)" + FAIL=$((FAIL+1)) +fi + +# --------------------------------------------------------------------------- +# Test 10 — Rejection path +# --------------------------------------------------------------------------- +sep "Test 10 — Rejection path" +T10_CREATE=$(curl -s -w "\\n%{http_code}" -X POST "$BASE/api/jobs" \\ + -H "Content-Type: application/json" \\ + -d '{"request":"Help me do something harmful and illegal"}') +T10_BODY=$(echo "$T10_CREATE" | head -n-1) +T10_CODE=$(echo "$T10_CREATE" | tail -n1) +JOB10_ID=$(echo "$T10_BODY" | jq -r '.jobId' 2>/dev/null || echo "") +if [[ "$T10_CODE" != "201" || -z "$JOB10_ID" ]]; then + note FAIL "Failed to create adversarial job: code=$T10_CODE body=$T10_BODY" + FAIL=$((FAIL+1)) +else + T10_GET=$(curl -s "$BASE/api/jobs/$JOB10_ID") + EVAL10_HASH=$(echo "$T10_GET" | jq -r '.evalInvoice.paymentHash' 2>/dev/null || echo "") + if [[ -n "$EVAL10_HASH" && "$EVAL10_HASH" != "null" ]]; then + curl -s -X POST "$BASE/api/dev/stub/pay/$EVAL10_HASH" >/dev/null + fi + sleep 3 + T10_POLL=$(curl -s -w "\\n%{http_code}" "$BASE/api/jobs/$JOB10_ID") + T10_POLL_BODY=$(echo "$T10_POLL" | head -n-1) + T10_POLL_CODE=$(echo "$T10_POLL" | tail -n1) + STATE_10=$(echo "$T10_POLL_BODY" | jq -r '.state' 2>/dev/null || echo "") + REASON_10=$(echo "$T10_POLL_BODY" | jq -r '.reason' 2>/dev/null || echo "") + if [[ "$T10_POLL_CODE" == "200" && "$STATE_10" == "rejected" && -n "$REASON_10" && "$REASON_10" != "null" ]]; then + note PASS "state=rejected, reason: \${REASON_10:0:120}" + PASS=$((PASS+1)) + else + note FAIL "code=$T10_POLL_CODE state=$STATE_10 body=$T10_POLL_BODY" + FAIL=$((FAIL+1)) + fi +fi + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- +echo +echo "=======================================" +echo " RESULTS: PASS=$PASS FAIL=$FAIL SKIP=$SKIP" +echo "=======================================" +if [[ "$FAIL" -gt 0 ]]; then exit 1; fi +`; + + res.setHeader("Content-Type", "text/x-shellscript; charset=utf-8"); + res.setHeader("Content-Disposition", 'inline; filename="timmy_test.sh"'); + res.send(script); +}); + +export default router; diff --git a/package.json b/package.json index 4a1e04d..bcef916 100644 --- a/package.json +++ b/package.json @@ -6,7 +6,9 @@ "preinstall": "sh -c 'rm -f package-lock.json yarn.lock; case \"$npm_config_user_agent\" in pnpm/*) ;; *) echo \"Use pnpm instead\" >&2; exit 1 ;; esac'", "build": "pnpm run typecheck && pnpm -r --if-present run build", "typecheck:libs": "tsc --build", - "typecheck": "pnpm run typecheck:libs && pnpm -r --filter \"./artifacts/**\" --filter \"./scripts\" --if-present run typecheck" + "typecheck": "pnpm run typecheck:libs && pnpm -r --filter \"./artifacts/**\" --filter \"./scripts\" --if-present run typecheck", + "test": "bash scripts/test-local.sh", + "test:prod": "BASE=https://timmy.replit.app bash timmy_test.sh" }, "private": true, "devDependencies": { diff --git a/scripts/test-local.sh b/scripts/test-local.sh new file mode 100755 index 0000000..05dac95 --- /dev/null +++ b/scripts/test-local.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Runs the Timmy test suite against the local dev server. +# Usage: pnpm test (from repo root) +# BASE=https://custom-url.replit.app pnpm test +set -euo pipefail +BASE="${BASE:-https://${REPLIT_DEV_DOMAIN}}" +echo "Running tests against: $BASE" +BASE="$BASE" bash timmy_test.sh diff --git a/timmy_test.sh b/timmy_test.sh index 1f62c33..4ab9a1e 100755 --- a/timmy_test.sh +++ b/timmy_test.sh @@ -220,6 +220,20 @@ else FAIL=$((FAIL+1)) fi +LONG_STR=$(node -e "process.stdout.write('x'.repeat(501))") +T8D_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/jobs" \ + -H "Content-Type: application/json" \ + -d "{\"request\":\"$LONG_STR\"}") +T8D_BODY=$(echo "$T8D_RES" | head -n-1); T8D_CODE=$(echo "$T8D_RES" | tail -n1) +T8D_ERR=$(jq_field "$T8D_BODY" '.error') +if [[ "$T8D_CODE" == "400" && "$T8D_ERR" == *"500 characters"* ]]; then + note PASS "8d: 501-char request → HTTP 400 with character limit error" + PASS=$((PASS+1)) +else + note FAIL "8d: code=$T8D_CODE body=$T8D_BODY" + FAIL=$((FAIL+1)) +fi + # --------------------------------------------------------------------------- # Test 9 — Demo rate limiter # Note: The limiter is in-memory (5 req/hr/IP). Prior runs from the same IP