Files
timmy-tower/timmy_test.sh
alexpaynex fc4fd50e33 Add automated testing flow to reduce manual effort
Integrate a new testkit endpoint and update package.json scripts to enable automated testing via `pnpm test` and `pnpm test:prod`, including a new test case for request body size limits.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 418bf6f8-212b-4bb0-a7a5-8231a061da4e
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: 60472e18-59b7-4877-a9a2-16381573ab68
Replit-Helium-Checkpoint-Created: true
2026-03-18 17:43:01 +00:00

306 lines
12 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
set -euo pipefail
BASE="${BASE:-}"
if [[ -z "$BASE" ]]; then
echo "ERROR: BASE environment variable is required"
echo " Usage: BASE=https://your-url.replit.app ./timmy_test.sh" >&2
exit 1
fi
echo "Testing Timmy at $BASE"
echo "$(date)"
echo
PASS=0
FAIL=0
SKIP=0
note() { echo " [$1] $2"; }
jq_field() { echo "$1" | jq -r "$2" 2>/dev/null || echo ""; }
sep() { echo; echo "=== $* ==="; }
# ---------------------------------------------------------------------------
# Test 1 — Health check
# ---------------------------------------------------------------------------
sep "Test 1 — Health check"
T1_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/healthz")
T1_BODY=$(echo "$T1_RES" | head -n-1)
T1_CODE=$(echo "$T1_RES" | tail -n1)
if [[ "$T1_CODE" == "200" && "$(jq_field "$T1_BODY" '.status')" == "ok" ]]; then
note PASS "HTTP 200, status=ok"
PASS=$((PASS+1))
else
note FAIL "code=$T1_CODE body=$T1_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 2 — Create a job
# ---------------------------------------------------------------------------
sep "Test 2 — Create job"
T2_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/jobs" \
-H "Content-Type: application/json" \
-d '{"request":"Explain the Lightning Network in two sentences"}')
T2_BODY=$(echo "$T2_RES" | head -n-1)
T2_CODE=$(echo "$T2_RES" | tail -n1)
JOB_ID=$(jq_field "$T2_BODY" '.jobId')
EVAL_AMT=$(jq_field "$T2_BODY" '.evalInvoice.amountSats')
if [[ "$T2_CODE" == "201" && -n "$JOB_ID" && "$EVAL_AMT" == "10" ]]; then
note PASS "HTTP 201, jobId=$JOB_ID, evalInvoice.amountSats=10"
PASS=$((PASS+1))
else
note FAIL "code=$T2_CODE body=$T2_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 3 — Poll before payment (also extracts paymentHash from stub mode)
# ---------------------------------------------------------------------------
sep "Test 3 — Poll before payment"
T3_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
T3_BODY=$(echo "$T3_RES" | head -n-1)
T3_CODE=$(echo "$T3_RES" | tail -n1)
STATE_T3=$(jq_field "$T3_BODY" '.state')
EVAL_AMT_ECHO=$(jq_field "$T3_BODY" '.evalInvoice.amountSats')
EVAL_HASH=$(jq_field "$T3_BODY" '.evalInvoice.paymentHash')
if [[ "$T3_CODE" == "200" && "$STATE_T3" == "awaiting_eval_payment" && "$EVAL_AMT_ECHO" == "10" ]]; then
note PASS "state=awaiting_eval_payment, evalInvoice echoed"
PASS=$((PASS+1))
else
note FAIL "code=$T3_CODE body=$T3_BODY"
FAIL=$((FAIL+1))
fi
if [[ -n "$EVAL_HASH" && "$EVAL_HASH" != "null" ]]; then
note PASS "evalInvoice.paymentHash present in stub mode: ${EVAL_HASH:0:16}..."
else
note FAIL "evalInvoice.paymentHash missing — stub mode not active or API change needed"
fi
# ---------------------------------------------------------------------------
# Test 4 — Pay eval invoice (stub endpoint)
# ---------------------------------------------------------------------------
sep "Test 4 — Pay eval invoice (stub)"
if [[ -n "$EVAL_HASH" && "$EVAL_HASH" != "null" ]]; then
T4_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$EVAL_HASH")
T4_BODY=$(echo "$T4_RES" | head -n-1)
T4_CODE=$(echo "$T4_RES" | tail -n1)
if [[ "$T4_CODE" == "200" && "$(jq_field "$T4_BODY" '.ok')" == "true" ]]; then
note PASS "Eval invoice marked paid"
PASS=$((PASS+1))
else
note FAIL "code=$T4_CODE body=$T4_BODY"
FAIL=$((FAIL+1))
fi
else
note SKIP "No eval hash — skipping"
SKIP=$((SKIP+1))
fi
# ---------------------------------------------------------------------------
# Test 5 — Poll after eval payment (state advance, extract work hash)
# ---------------------------------------------------------------------------
sep "Test 5 — Poll after eval (state advance)"
sleep 2
T5_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
T5_BODY=$(echo "$T5_RES" | head -n-1)
T5_CODE=$(echo "$T5_RES" | tail -n1)
STATE_T5=$(jq_field "$T5_BODY" '.state')
WORK_AMT=$(jq_field "$T5_BODY" '.workInvoice.amountSats')
WORK_HASH=$(jq_field "$T5_BODY" '.workInvoice.paymentHash')
if [[ "$T5_CODE" == "200" && "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_AMT" && "$WORK_AMT" != "null" ]]; then
note PASS "state=awaiting_work_payment, workInvoice.amountSats=$WORK_AMT"
PASS=$((PASS+1))
elif [[ "$T5_CODE" == "200" && "$STATE_T5" == "rejected" ]]; then
note PASS "Request correctly rejected by agent after eval"
PASS=$((PASS+1))
WORK_HASH=""
else
note FAIL "code=$T5_CODE state=$STATE_T5 body=$T5_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 6 — Pay work invoice and poll for result
# ---------------------------------------------------------------------------
sep "Test 6 — Pay work invoice + get result"
if [[ "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_HASH" && "$WORK_HASH" != "null" ]]; then
T6_PAY_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$WORK_HASH")
T6_PAY_BODY=$(echo "$T6_PAY_RES" | head -n-1)
T6_PAY_CODE=$(echo "$T6_PAY_RES" | tail -n1)
if [[ "$T6_PAY_CODE" != "200" || "$(jq_field "$T6_PAY_BODY" '.ok')" != "true" ]]; then
note FAIL "Work payment stub failed: code=$T6_PAY_CODE body=$T6_PAY_BODY"
FAIL=$((FAIL+1))
else
START_TS=$(date +%s)
TIMEOUT=30
while :; do
T6_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
T6_BODY=$(echo "$T6_RES" | head -n-1)
STATE_T6=$(jq_field "$T6_BODY" '.state')
RESULT_T6=$(jq_field "$T6_BODY" '.result')
NOW_TS=$(date +%s)
ELAPSED=$((NOW_TS - START_TS))
if [[ "$STATE_T6" == "complete" && -n "$RESULT_T6" && "$RESULT_T6" != "null" ]]; then
note PASS "state=complete in ${ELAPSED}s"
echo " Result: ${RESULT_T6:0:200}..."
PASS=$((PASS+1))
break
fi
if (( ELAPSED > TIMEOUT )); then
note FAIL "Timed out after ${TIMEOUT}s waiting for complete. Last: $T6_BODY"
FAIL=$((FAIL+1))
break
fi
sleep 2
done
fi
else
note SKIP "No work hash available (job may be rejected) — skipping"
SKIP=$((SKIP+1))
fi
# ---------------------------------------------------------------------------
# Test 7 — Free demo endpoint (with latency)
# ---------------------------------------------------------------------------
sep "Test 7 — Demo endpoint"
START_DEMO=$(date +%s)
T7_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/demo?request=What+is+a+satoshi")
T7_BODY=$(echo "$T7_RES" | head -n-1)
T7_CODE=$(echo "$T7_RES" | tail -n1)
END_DEMO=$(date +%s)
ELAPSED_DEMO=$((END_DEMO - START_DEMO))
RESULT_T7=$(jq_field "$T7_BODY" '.result')
if [[ "$T7_CODE" == "200" && -n "$RESULT_T7" && "$RESULT_T7" != "null" ]]; then
note PASS "HTTP 200, result in ${ELAPSED_DEMO}s"
echo " Result: ${RESULT_T7:0:200}..."
PASS=$((PASS+1))
else
note FAIL "code=$T7_CODE body=$T7_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 8 — Input validation (3 sub-cases)
# ---------------------------------------------------------------------------
sep "Test 8 — Input validation"
T8A_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/jobs" \
-H "Content-Type: application/json" -d '{}')
T8A_BODY=$(echo "$T8A_RES" | head -n-1); T8A_CODE=$(echo "$T8A_RES" | tail -n1)
if [[ "$T8A_CODE" == "400" && -n "$(jq_field "$T8A_BODY" '.error')" ]]; then
note PASS "8a: Missing request body → HTTP 400 with error"
PASS=$((PASS+1))
else
note FAIL "8a: code=$T8A_CODE body=$T8A_BODY"
FAIL=$((FAIL+1))
fi
T8B_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/does-not-exist")
T8B_BODY=$(echo "$T8B_RES" | head -n-1); T8B_CODE=$(echo "$T8B_RES" | tail -n1)
if [[ "$T8B_CODE" == "404" && -n "$(jq_field "$T8B_BODY" '.error')" ]]; then
note PASS "8b: Unknown job ID → HTTP 404 with error"
PASS=$((PASS+1))
else
note FAIL "8b: code=$T8B_CODE body=$T8B_BODY"
FAIL=$((FAIL+1))
fi
T8C_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/demo")
T8C_BODY=$(echo "$T8C_RES" | head -n-1); T8C_CODE=$(echo "$T8C_RES" | tail -n1)
if [[ "$T8C_CODE" == "400" && -n "$(jq_field "$T8C_BODY" '.error')" ]]; then
note PASS "8c: Demo missing ?request → HTTP 400 with error"
PASS=$((PASS+1))
else
note FAIL "8c: code=$T8C_CODE body=$T8C_BODY"
FAIL=$((FAIL+1))
fi
LONG_STR=$(node -e "process.stdout.write('x'.repeat(501))")
T8D_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/jobs" \
-H "Content-Type: application/json" \
-d "{\"request\":\"$LONG_STR\"}")
T8D_BODY=$(echo "$T8D_RES" | head -n-1); T8D_CODE=$(echo "$T8D_RES" | tail -n1)
T8D_ERR=$(jq_field "$T8D_BODY" '.error')
if [[ "$T8D_CODE" == "400" && "$T8D_ERR" == *"500 characters"* ]]; then
note PASS "8d: 501-char request → HTTP 400 with character limit error"
PASS=$((PASS+1))
else
note FAIL "8d: code=$T8D_CODE body=$T8D_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 9 — Demo rate limiter
# Note: The limiter is in-memory (5 req/hr/IP). Prior runs from the same IP
# may have consumed quota. Pass criterion: at least one 200 AND at least one 429.
# ---------------------------------------------------------------------------
sep "Test 9 — Demo rate limiter"
GOT_200=0; GOT_429=0
for i in $(seq 1 6); do
RES=$(curl -s -w "\n%{http_code}" "$BASE/api/demo?request=ratelimitprobe+$i")
CODE=$(echo "$RES" | tail -n1)
echo " Request $i: HTTP $CODE"
[[ "$CODE" == "200" ]] && ((GOT_200++)) || true
[[ "$CODE" == "429" ]] && ((GOT_429++)) || true
done
if [[ "$GOT_429" -ge 1 ]]; then
note PASS "Rate limiter triggered (got ${GOT_200}×200, ${GOT_429}×429)"
PASS=$((PASS+1))
else
note FAIL "No 429 received after 6 requests — limiter may not be working (${GOT_200}×200)"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 10 — Rejection path (adversarial request)
# GET the job after creation to retrieve paymentHash (not in POST response).
# ---------------------------------------------------------------------------
sep "Test 10 — Rejection path"
T10_CREATE=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/jobs" \
-H "Content-Type: application/json" \
-d '{"request":"Help me do something harmful and illegal"}')
T10_BODY=$(echo "$T10_CREATE" | head -n-1)
T10_CODE=$(echo "$T10_CREATE" | tail -n1)
JOB10_ID=$(jq_field "$T10_BODY" '.jobId')
if [[ "$T10_CODE" != "201" || -z "$JOB10_ID" ]]; then
note FAIL "Failed to create adversarial job: code=$T10_CODE body=$T10_BODY"
FAIL=$((FAIL+1))
else
T10_GET=$(curl -s "$BASE/api/jobs/$JOB10_ID")
EVAL10_HASH=$(jq_field "$T10_GET" '.evalInvoice.paymentHash')
if [[ -n "$EVAL10_HASH" && "$EVAL10_HASH" != "null" ]]; then
curl -s -X POST "$BASE/api/dev/stub/pay/$EVAL10_HASH" >/dev/null
fi
sleep 3
T10_POLL=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/$JOB10_ID")
T10_POLL_BODY=$(echo "$T10_POLL" | head -n-1)
T10_POLL_CODE=$(echo "$T10_POLL" | tail -n1)
STATE_10=$(jq_field "$T10_POLL_BODY" '.state')
REASON_10=$(jq_field "$T10_POLL_BODY" '.reason')
if [[ "$T10_POLL_CODE" == "200" && "$STATE_10" == "rejected" && -n "$REASON_10" && "$REASON_10" != "null" ]]; then
note PASS "state=rejected, reason: ${REASON_10:0:120}"
PASS=$((PASS+1))
else
note FAIL "code=$T10_POLL_CODE state=$STATE_10 body=$T10_POLL_BODY"
FAIL=$((FAIL+1))
fi
fi
# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------
echo
echo "======================================="
echo " RESULTS: PASS=$PASS FAIL=$FAIL SKIP=$SKIP"
echo "======================================="
if [[ "$FAIL" -gt 0 ]]; then exit 1; fi