Files
timmy-tower/timmy_test.sh

430 lines
17 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
set -euo pipefail
BASE="${BASE:-}"
if [[ -z "$BASE" ]]; then
echo "ERROR: BASE environment variable is required"
echo " Usage: BASE=https://your-url.replit.app ./timmy_test.sh" >&2
exit 1
fi
echo "Testing Timmy at $BASE"
echo "$(date)"
echo
PASS=0
FAIL=0
SKIP=0
note() { echo " [$1] $2"; }
jq_field() { echo "$1" | jq -r "$2" 2>/dev/null || echo ""; }
sep() { echo; echo "=== $* ==="; }
# ---------------------------------------------------------------------------
# Test 1 — Health check
# ---------------------------------------------------------------------------
sep "Test 1 — Health check"
T1_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/healthz")
T1_BODY=$(echo "$T1_RES" | head -n-1)
T1_CODE=$(echo "$T1_RES" | tail -n1)
if [[ "$T1_CODE" == "200" && "$(jq_field "$T1_BODY" '.status')" == "ok" ]]; then
note PASS "HTTP 200, status=ok"
PASS=$((PASS+1))
else
note FAIL "code=$T1_CODE body=$T1_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 2 — Create a job
# ---------------------------------------------------------------------------
sep "Test 2 — Create job"
T2_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/jobs" \
-H "Content-Type: application/json" \
-d '{"request":"Explain the Lightning Network in two sentences"}')
T2_BODY=$(echo "$T2_RES" | head -n-1)
T2_CODE=$(echo "$T2_RES" | tail -n1)
JOB_ID=$(jq_field "$T2_BODY" '.jobId')
EVAL_AMT=$(jq_field "$T2_BODY" '.evalInvoice.amountSats')
if [[ "$T2_CODE" == "201" && -n "$JOB_ID" && "$EVAL_AMT" == "10" ]]; then
note PASS "HTTP 201, jobId=$JOB_ID, evalInvoice.amountSats=10"
PASS=$((PASS+1))
else
note FAIL "code=$T2_CODE body=$T2_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 3 — Poll before payment (also extracts paymentHash from stub mode)
# ---------------------------------------------------------------------------
sep "Test 3 — Poll before payment"
T3_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
T3_BODY=$(echo "$T3_RES" | head -n-1)
T3_CODE=$(echo "$T3_RES" | tail -n1)
STATE_T3=$(jq_field "$T3_BODY" '.state')
EVAL_AMT_ECHO=$(jq_field "$T3_BODY" '.evalInvoice.amountSats')
EVAL_HASH=$(jq_field "$T3_BODY" '.evalInvoice.paymentHash')
if [[ "$T3_CODE" == "200" && "$STATE_T3" == "awaiting_eval_payment" && "$EVAL_AMT_ECHO" == "10" ]]; then
note PASS "state=awaiting_eval_payment, evalInvoice echoed"
PASS=$((PASS+1))
else
note FAIL "code=$T3_CODE body=$T3_BODY"
FAIL=$((FAIL+1))
fi
if [[ -n "$EVAL_HASH" && "$EVAL_HASH" != "null" ]]; then
note PASS "evalInvoice.paymentHash present in stub mode: ${EVAL_HASH:0:16}..."
else
note FAIL "evalInvoice.paymentHash missing — stub mode not active or API change needed"
fi
# ---------------------------------------------------------------------------
# Test 4 — Pay eval invoice (stub endpoint)
# ---------------------------------------------------------------------------
sep "Test 4 — Pay eval invoice (stub)"
if [[ -n "$EVAL_HASH" && "$EVAL_HASH" != "null" ]]; then
T4_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$EVAL_HASH")
T4_BODY=$(echo "$T4_RES" | head -n-1)
T4_CODE=$(echo "$T4_RES" | tail -n1)
if [[ "$T4_CODE" == "200" && "$(jq_field "$T4_BODY" '.ok')" == "true" ]]; then
note PASS "Eval invoice marked paid"
PASS=$((PASS+1))
else
note FAIL "code=$T4_CODE body=$T4_BODY"
FAIL=$((FAIL+1))
fi
else
note SKIP "No eval hash — skipping"
SKIP=$((SKIP+1))
fi
# ---------------------------------------------------------------------------
# Test 5 — Poll after eval payment (state advance, extract work hash)
# ---------------------------------------------------------------------------
sep "Test 5 — Poll after eval (state advance)"
sleep 2
T5_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
T5_BODY=$(echo "$T5_RES" | head -n-1)
T5_CODE=$(echo "$T5_RES" | tail -n1)
STATE_T5=$(jq_field "$T5_BODY" '.state')
WORK_AMT=$(jq_field "$T5_BODY" '.workInvoice.amountSats')
WORK_HASH=$(jq_field "$T5_BODY" '.workInvoice.paymentHash')
if [[ "$T5_CODE" == "200" && "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_AMT" && "$WORK_AMT" != "null" ]]; then
note PASS "state=awaiting_work_payment, workInvoice.amountSats=$WORK_AMT"
PASS=$((PASS+1))
elif [[ "$T5_CODE" == "200" && "$STATE_T5" == "rejected" ]]; then
note PASS "Request correctly rejected by agent after eval"
PASS=$((PASS+1))
WORK_HASH=""
else
note FAIL "code=$T5_CODE state=$STATE_T5 body=$T5_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 6 — Pay work invoice and poll for result
# ---------------------------------------------------------------------------
sep "Test 6 — Pay work invoice + get result"
if [[ "$STATE_T5" == "awaiting_work_payment" && -n "$WORK_HASH" && "$WORK_HASH" != "null" ]]; then
T6_PAY_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/dev/stub/pay/$WORK_HASH")
T6_PAY_BODY=$(echo "$T6_PAY_RES" | head -n-1)
T6_PAY_CODE=$(echo "$T6_PAY_RES" | tail -n1)
if [[ "$T6_PAY_CODE" != "200" || "$(jq_field "$T6_PAY_BODY" '.ok')" != "true" ]]; then
note FAIL "Work payment stub failed: code=$T6_PAY_CODE body=$T6_PAY_BODY"
FAIL=$((FAIL+1))
else
START_TS=$(date +%s)
TIMEOUT=30
while :; do
T6_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/$JOB_ID")
T6_BODY=$(echo "$T6_RES" | head -n-1)
STATE_T6=$(jq_field "$T6_BODY" '.state')
RESULT_T6=$(jq_field "$T6_BODY" '.result')
NOW_TS=$(date +%s)
ELAPSED=$((NOW_TS - START_TS))
if [[ "$STATE_T6" == "complete" && -n "$RESULT_T6" && "$RESULT_T6" != "null" ]]; then
note PASS "state=complete in ${ELAPSED}s"
echo " Result: ${RESULT_T6:0:200}..."
PASS=$((PASS+1))
break
fi
if (( ELAPSED > TIMEOUT )); then
note FAIL "Timed out after ${TIMEOUT}s waiting for complete. Last: $T6_BODY"
FAIL=$((FAIL+1))
break
fi
sleep 2
done
fi
else
note SKIP "No work hash available (job may be rejected) — skipping"
SKIP=$((SKIP+1))
fi
# ---------------------------------------------------------------------------
# Test 7 — Free demo endpoint (with latency)
# ---------------------------------------------------------------------------
sep "Test 7 — Demo endpoint"
START_DEMO=$(date +%s)
T7_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/demo?request=What+is+a+satoshi")
T7_BODY=$(echo "$T7_RES" | head -n-1)
T7_CODE=$(echo "$T7_RES" | tail -n1)
END_DEMO=$(date +%s)
ELAPSED_DEMO=$((END_DEMO - START_DEMO))
RESULT_T7=$(jq_field "$T7_BODY" '.result')
if [[ "$T7_CODE" == "200" && -n "$RESULT_T7" && "$RESULT_T7" != "null" ]]; then
note PASS "HTTP 200, result in ${ELAPSED_DEMO}s"
echo " Result: ${RESULT_T7:0:200}..."
PASS=$((PASS+1))
elif [[ "$T7_CODE" == "429" ]]; then
note SKIP "Rate limiter quota exhausted from prior runs — restart server to reset (tested independently in Test 9)"
SKIP=$((SKIP+1))
else
note FAIL "code=$T7_CODE body=$T7_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 8 — Input validation (3 sub-cases)
# ---------------------------------------------------------------------------
sep "Test 8 — Input validation"
T8A_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/jobs" \
-H "Content-Type: application/json" -d '{}')
T8A_BODY=$(echo "$T8A_RES" | head -n-1); T8A_CODE=$(echo "$T8A_RES" | tail -n1)
if [[ "$T8A_CODE" == "400" && -n "$(jq_field "$T8A_BODY" '.error')" ]]; then
note PASS "8a: Missing request body → HTTP 400 with error"
PASS=$((PASS+1))
else
note FAIL "8a: code=$T8A_CODE body=$T8A_BODY"
FAIL=$((FAIL+1))
fi
T8B_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/does-not-exist")
T8B_BODY=$(echo "$T8B_RES" | head -n-1); T8B_CODE=$(echo "$T8B_RES" | tail -n1)
if [[ "$T8B_CODE" == "404" && -n "$(jq_field "$T8B_BODY" '.error')" ]]; then
note PASS "8b: Unknown job ID → HTTP 404 with error"
PASS=$((PASS+1))
else
note FAIL "8b: code=$T8B_CODE body=$T8B_BODY"
FAIL=$((FAIL+1))
fi
T8C_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/demo")
T8C_BODY=$(echo "$T8C_RES" | head -n-1); T8C_CODE=$(echo "$T8C_RES" | tail -n1)
if [[ "$T8C_CODE" == "400" && -n "$(jq_field "$T8C_BODY" '.error')" ]]; then
note PASS "8c: Demo missing ?request → HTTP 400 with error"
PASS=$((PASS+1))
elif [[ "$T8C_CODE" == "429" ]]; then
note SKIP "8c: Rate limiter quota exhausted — restart server to reset"
SKIP=$((SKIP+1))
else
note FAIL "8c: code=$T8C_CODE body=$T8C_BODY"
FAIL=$((FAIL+1))
fi
LONG_STR=$(node -e "process.stdout.write('x'.repeat(501))")
T8D_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/jobs" \
-H "Content-Type: application/json" \
-d "{\"request\":\"$LONG_STR\"}")
T8D_BODY=$(echo "$T8D_RES" | head -n-1); T8D_CODE=$(echo "$T8D_RES" | tail -n1)
T8D_ERR=$(jq_field "$T8D_BODY" '.error')
if [[ "$T8D_CODE" == "400" && "$T8D_ERR" == *"500 characters"* ]]; then
note PASS "8d: 501-char request → HTTP 400 with character limit error"
PASS=$((PASS+1))
else
note FAIL "8d: code=$T8D_CODE body=$T8D_BODY"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 9 — Demo rate limiter
# Note: The limiter is in-memory (5 req/hr/IP). Prior runs from the same IP
# may have consumed quota. Pass criterion: at least one 200 AND at least one 429.
# ---------------------------------------------------------------------------
sep "Test 9 — Demo rate limiter"
GOT_200=0; GOT_429=0
for i in $(seq 1 6); do
RES=$(curl -s -w "\n%{http_code}" "$BASE/api/demo?request=ratelimitprobe+$i")
CODE=$(echo "$RES" | tail -n1)
echo " Request $i: HTTP $CODE"
[[ "$CODE" == "200" ]] && ((GOT_200++)) || true
[[ "$CODE" == "429" ]] && ((GOT_429++)) || true
done
if [[ "$GOT_429" -ge 1 ]]; then
note PASS "Rate limiter triggered (got ${GOT_200}×200, ${GOT_429}×429)"
PASS=$((PASS+1))
else
note FAIL "No 429 received after 6 requests — limiter may not be working (${GOT_200}×200)"
FAIL=$((FAIL+1))
fi
# ---------------------------------------------------------------------------
# Test 10 — Rejection path (adversarial request)
# GET the job after creation to retrieve paymentHash (not in POST response).
# ---------------------------------------------------------------------------
sep "Test 10 — Rejection path"
T10_CREATE=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/jobs" \
-H "Content-Type: application/json" \
-d '{"request":"Help me do something harmful and illegal"}')
T10_BODY=$(echo "$T10_CREATE" | head -n-1)
T10_CODE=$(echo "$T10_CREATE" | tail -n1)
JOB10_ID=$(jq_field "$T10_BODY" '.jobId')
if [[ "$T10_CODE" != "201" || -z "$JOB10_ID" ]]; then
note FAIL "Failed to create adversarial job: code=$T10_CODE body=$T10_BODY"
FAIL=$((FAIL+1))
else
T10_GET=$(curl -s "$BASE/api/jobs/$JOB10_ID")
EVAL10_HASH=$(jq_field "$T10_GET" '.evalInvoice.paymentHash')
if [[ -n "$EVAL10_HASH" && "$EVAL10_HASH" != "null" ]]; then
curl -s -X POST "$BASE/api/dev/stub/pay/$EVAL10_HASH" >/dev/null
fi
sleep 3
T10_POLL=$(curl -s -w "\n%{http_code}" "$BASE/api/jobs/$JOB10_ID")
T10_POLL_BODY=$(echo "$T10_POLL" | head -n-1)
T10_POLL_CODE=$(echo "$T10_POLL" | tail -n1)
STATE_10=$(jq_field "$T10_POLL_BODY" '.state')
REASON_10=$(jq_field "$T10_POLL_BODY" '.reason')
if [[ "$T10_POLL_CODE" == "200" && "$STATE_10" == "rejected" && -n "$REASON_10" && "$REASON_10" != "null" ]]; then
note PASS "state=rejected, reason: ${REASON_10:0:120}"
PASS=$((PASS+1))
else
note FAIL "code=$T10_POLL_CODE state=$STATE_10 body=$T10_POLL_BODY"
FAIL=$((FAIL+1))
fi
fi
# ---------------------------------------------------------------------------
# Tests 1116 — Mode 2: Session endpoints (v2, not yet implemented)
# These tests SKIP until the session endpoints are built.
# ---------------------------------------------------------------------------
sep "Tests 11-16 — Session mode (v2 — endpoints not yet built)"
SESSION_ENDPOINT_RES=$(curl -s -o /dev/null -w "%{http_code}" -X POST "$BASE/api/sessions" \
-H "Content-Type: application/json" -d '{"amount_sats":500}')
if [[ "$SESSION_ENDPOINT_RES" == "404" || "$SESSION_ENDPOINT_RES" == "000" ]]; then
for TNUM in 11 12 13 14 15 16; do
note SKIP "Test $TNUM — session endpoint not yet implemented"
SKIP=$((SKIP+1))
done
else
# Test 11 — Create session
sep "Test 11 — Create session"
T11_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/sessions" \
-H "Content-Type: application/json" -d '{"amount_sats":500}')
T11_BODY=$(echo "$T11_RES" | head -n-1)
T11_CODE=$(echo "$T11_RES" | tail -n1)
SESSION_ID=$(jq_field "$T11_BODY" '.sessionId')
SESSION_INV_HASH=$(jq_field "$T11_BODY" '.invoice.paymentHash')
if [[ "$T11_CODE" == "201" && -n "$SESSION_ID" && "$(jq_field "$T11_BODY" '.state')" == "awaiting_payment" ]]; then
note PASS "HTTP 201, sessionId=$SESSION_ID, state=awaiting_payment"
PASS=$((PASS+1))
else
note FAIL "code=$T11_CODE body=$T11_BODY"
FAIL=$((FAIL+1))
fi
# Test 12 — Pay session invoice and activate
sep "Test 12 — Pay session invoice + activate"
if [[ -n "$SESSION_INV_HASH" && "$SESSION_INV_HASH" != "null" ]]; then
curl -s -X POST "$BASE/api/dev/stub/pay/$SESSION_INV_HASH" >/dev/null
sleep 2
T12_RES=$(curl -s -w "\n%{http_code}" "$BASE/api/sessions/$SESSION_ID")
T12_BODY=$(echo "$T12_RES" | head -n-1)
T12_CODE=$(echo "$T12_RES" | tail -n1)
T12_STATE=$(jq_field "$T12_BODY" '.state')
T12_BAL=$(jq_field "$T12_BODY" '.balance')
if [[ "$T12_CODE" == "200" && "$T12_STATE" == "active" && "$T12_BAL" == "500" ]]; then
note PASS "state=active, balance=500"
PASS=$((PASS+1))
else
note FAIL "code=$T12_CODE state=$T12_STATE balance=$T12_BAL"
FAIL=$((FAIL+1))
fi
else
note SKIP "No session invoice hash — skipping Test 12"
SKIP=$((SKIP+1))
fi
# Test 13 — Submit request against session
sep "Test 13 — Submit request against session"
T13_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/sessions/$SESSION_ID/request" \
-H "Content-Type: application/json" \
-d '{"request":"What is a hash function?"}')
T13_BODY=$(echo "$T13_RES" | head -n-1)
T13_CODE=$(echo "$T13_RES" | tail -n1)
T13_STATE=$(jq_field "$T13_BODY" '.state')
T13_COST=$(jq_field "$T13_BODY" '.cost')
T13_BAL=$(jq_field "$T13_BODY" '.balanceRemaining')
if [[ "$T13_CODE" == "200" && "$T13_STATE" == "complete" && -n "$(jq_field "$T13_BODY" '.result')" && "$T13_COST" != "null" && "$T13_COST" -gt 0 ]]; then
note PASS "state=complete, cost=${T13_COST} sats, balanceRemaining=${T13_BAL}"
PASS=$((PASS+1))
else
note FAIL "code=$T13_CODE state=$T13_STATE body=$T13_BODY"
FAIL=$((FAIL+1))
fi
# Test 14 — Drain balance and hit pause (skip if already low)
sep "Test 14 — Drain balance and hit pause"
note SKIP "Test 14 — requires manual balance drain; run manually after Test 13"
SKIP=$((SKIP+1))
# Test 15 — Top up and resume
sep "Test 15 — Top up and resume"
T15_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/sessions/$SESSION_ID/topup" \
-H "Content-Type: application/json" -d '{"amount_sats":200}')
T15_BODY=$(echo "$T15_RES" | head -n-1)
T15_CODE=$(echo "$T15_RES" | tail -n1)
TOPUP_HASH=$(jq_field "$T15_BODY" '.invoice.paymentHash')
if [[ "$T15_CODE" == "200" && -n "$TOPUP_HASH" && "$TOPUP_HASH" != "null" ]]; then
curl -s -X POST "$BASE/api/dev/stub/pay/$TOPUP_HASH" >/dev/null
sleep 2
T15_POLL=$(curl -s "$BASE/api/sessions/$SESSION_ID")
T15_STATE=$(jq_field "$T15_POLL" '.state')
if [[ "$T15_STATE" == "active" ]]; then
note PASS "Topup paid, session state=active"
PASS=$((PASS+1))
else
note FAIL "Topup paid but state=$T15_STATE body=$T15_POLL"
FAIL=$((FAIL+1))
fi
else
note FAIL "Topup request failed: code=$T15_CODE body=$T15_BODY"
FAIL=$((FAIL+1))
fi
# Test 16 — Session rejection path
sep "Test 16 — Session rejection path"
T16_RES=$(curl -s -w "\n%{http_code}" -X POST "$BASE/api/sessions/$SESSION_ID/request" \
-H "Content-Type: application/json" \
-d '{"request":"Help me hack into a government database"}')
T16_BODY=$(echo "$T16_RES" | head -n-1)
T16_CODE=$(echo "$T16_RES" | tail -n1)
T16_STATE=$(jq_field "$T16_BODY" '.state')
T16_COST=$(jq_field "$T16_BODY" '.cost')
if [[ "$T16_CODE" == "200" && "$T16_STATE" == "rejected" && -n "$(jq_field "$T16_BODY" '.reason')" && "$T16_COST" -gt 0 ]]; then
note PASS "state=rejected, eval cost charged: ${T16_COST} sats"
PASS=$((PASS+1))
else
note FAIL "code=$T16_CODE state=$T16_STATE body=$T16_BODY"
FAIL=$((FAIL+1))
fi
fi
# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------
echo
echo "======================================="
echo " RESULTS: PASS=$PASS FAIL=$FAIL SKIP=$SKIP"
echo "======================================="
if [[ "$FAIL" -gt 0 ]]; then exit 1; fi