Task #27: Apply 3 required fixes for cost-routing + free-tier gate

1. Add `estimateRequestCost(request, model)` to PricingService in pricing.ts
   - Unified method combining estimateInputTokens + estimateOutputTokens + calculateWorkFeeUsd
   - Replaces duplicated token estimation logic at call sites in jobs.ts, sessions.ts, estimate.ts

2. Move partial free-tier `recordGrant()` from invoice creation to post-work in runWorkInBackground
   - Previously called at invoice creation for partial path (before user pays) — economic DoS vulnerability
   - Now deferred to after work completes, using new `partialAbsorbSats` parameter in runWorkInBackground
   - Fully-free jobs still record grant at eval time (no payment involved)

3. Sessions pre-gate refactor: estimate → decide → execute → reconcile
   - Free-tier `decide()` now runs on ESTIMATED cost BEFORE `executeWork()` is called
   - After execution, `absorbedSats` is capped at actual cost (Math.min) to prevent over-absorption
   - Uses new `estimateRequestCost()` for clean single-call estimation
This commit is contained in:
alexpaynex
2026-03-19 16:43:41 +00:00
parent 4c3a0e867a
commit 512089ca08
4 changed files with 54 additions and 22 deletions

View File

@@ -328,6 +328,19 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
let reason: string | null = null;
let errorMessage: string | null = null;
// ── Pre-gate: free-tier decision on ESTIMATED cost before executing work ──
// Estimate cost so we can commit a budget reservation before calling the LLM,
// preventing a scenario where the pool is drained after we've already spent tokens.
let ftDecision: import("../lib/free-tier.js").FreeTierDecision | null = null;
if (evalResult.accepted && session.nostrPubkey) {
const { estimatedCostUsd } = pricingService.estimateRequestCost(requestText, agentService.workModel);
const estimatedSats = usdToSats(
pricingService.calculateActualChargeUsd(estimatedCostUsd),
btcPriceUsd,
);
ftDecision = await freeTierService.decide(session.nostrPubkey, estimatedSats);
}
if (evalResult.accepted) {
try {
const workResult = await agentService.executeWork(requestText);
@@ -353,20 +366,18 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
const chargeUsd = pricingService.calculateActualChargeUsd(totalTokenCostUsd);
const fullDebitSats = usdToSats(chargeUsd, btcPriceUsd);
// ── Free-tier gate (only on successful requests) ─────────────────────────
// ── Reconcile free-tier decision against actual cost ──────────────────────
// Cap absorbedSats at the actual cost so we never over-absorb from the pool.
let debitedSats = fullDebitSats;
let freeTierServed = false;
let absorbedSats = 0;
if (finalState === "complete" && session.nostrPubkey) {
const ftDecision = await freeTierService.decide(session.nostrPubkey, fullDebitSats);
if (ftDecision.serve !== "gate") {
absorbedSats = ftDecision.absorbSats;
debitedSats = ftDecision.chargeSats;
freeTierServed = true;
const reqHash = createHash("sha256").update(requestText).digest("hex");
void freeTierService.recordGrant(session.nostrPubkey, reqHash, absorbedSats);
}
if (finalState === "complete" && ftDecision && ftDecision.serve !== "gate") {
absorbedSats = Math.min(ftDecision.absorbSats, fullDebitSats);
debitedSats = Math.max(0, fullDebitSats - absorbedSats);
freeTierServed = true;
const reqHash = createHash("sha256").update(requestText).digest("hex");
void freeTierService.recordGrant(session.nostrPubkey!, reqHash, absorbedSats);
}
// Credit pool from paid portion (even if partial free tier)