Task #27: Apply 3 required fixes for cost-routing + free-tier gate

1. Add `estimateRequestCost(request, model)` to PricingService in pricing.ts
   - Unified method combining estimateInputTokens + estimateOutputTokens + calculateWorkFeeUsd
   - Replaces duplicated token estimation logic at call sites in jobs.ts, sessions.ts, estimate.ts

2. Move partial free-tier `recordGrant()` from invoice creation to post-work in runWorkInBackground
   - Previously called at invoice creation for partial path (before user pays) — economic DoS vulnerability
   - Now deferred to after work completes, using new `partialAbsorbSats` parameter in runWorkInBackground
   - Fully-free jobs still record grant at eval time (no payment involved)

3. Sessions pre-gate refactor: estimate → decide → execute → reconcile
   - Free-tier `decide()` now runs on ESTIMATED cost BEFORE `executeWork()` is called
   - After execution, `absorbedSats` is capped at actual cost (Math.min) to prevent over-absorption
   - Uses new `estimateRequestCost()` for clean single-call estimation
This commit is contained in:
alexpaynex
2026-03-19 16:43:41 +00:00
parent 4c3a0e867a
commit 512089ca08
4 changed files with 54 additions and 22 deletions

View File

@@ -159,6 +159,20 @@ export class PricingService {
return { amountSats, estimatedCostUsd, marginPct: this.marginPct, btcPriceUsd };
}
/**
* Combined estimate: input tokens + output tokens + work fee USD for a given request.
* Single call-site for pre-gate cost estimation — replaces duplicated logic in routes.
*/
estimateRequestCost(
requestText: string,
modelId: string,
): { estimatedInputTokens: number; estimatedOutputTokens: number; estimatedCostUsd: number } {
const estimatedInputTokens = this.estimateInputTokens(requestText);
const estimatedOutputTokens = this.estimateOutputTokens(requestText);
const estimatedCostUsd = this.calculateWorkFeeUsd(estimatedInputTokens, estimatedOutputTokens, modelId);
return { estimatedInputTokens, estimatedOutputTokens, estimatedCostUsd };
}
// ── Post-work honest accounting ──────────────────────────────────────────
/**

View File

@@ -25,10 +25,9 @@ router.get("/estimate", async (req: Request, res: Response) => {
}
try {
const inputTokens = pricingService.estimateInputTokens(requestText);
const outputTokens = pricingService.estimateOutputTokens(requestText);
const { estimatedInputTokens: inputTokens, estimatedOutputTokens: outputTokens, estimatedCostUsd: costUsd } =
pricingService.estimateRequestCost(requestText, agentService.workModel);
const btcPriceUsd = await getBtcPriceUsd();
const costUsd = pricingService.calculateWorkFeeUsd(inputTokens, outputTokens, agentService.workModel);
const estimatedSats = usdToSats(costUsd, btcPriceUsd);
// Optionally resolve Nostr identity from query param or header for free-tier preview

View File

@@ -52,11 +52,10 @@ async function runEvalInBackground(
});
if (evalResult.accepted) {
const inputEst = pricingService.estimateInputTokens(request);
const outputEst = pricingService.estimateOutputTokens(request);
const { estimatedInputTokens, estimatedOutputTokens } = pricingService.estimateRequestCost(request, agentService.workModel);
const breakdown = await pricingService.calculateWorkFeeSats(
inputEst,
outputEst,
estimatedInputTokens,
estimatedOutputTokens,
agentService.workModel,
);
@@ -135,11 +134,8 @@ async function runEvalInBackground(
.where(eq(jobs.id, jobId));
});
// Record partial grant immediately (reserves pool capacity)
if (ftDecision.serve === "partial" && nostrPubkey) {
const reqHash = createHash("sha256").update(request).digest("hex");
void freeTierService.recordGrant(nostrPubkey, reqHash, ftDecision.absorbSats);
}
// Partial grant is recorded AFTER payment + work completes (in runWorkInBackground)
// to avoid economic DoS where pool is reserved before the user ever pays.
eventBus.publish({ type: "job:state", jobId, state: "awaiting_work_payment" });
} else {
@@ -183,6 +179,7 @@ async function runWorkInBackground(
btcPriceUsd: number | null,
isFree = false,
nostrPubkey: string | null = null,
partialAbsorbSats = 0,
): Promise<void> {
const workStart = Date.now();
try {
@@ -240,6 +237,13 @@ async function runWorkInBackground(
void freeTierService.credit(workAmountSats);
}
// Record partial free-tier grant now that work is confirmed complete.
// Deferred from invoice creation to prevent economic DoS (pool reservation without payment).
if (!isFree && partialAbsorbSats > 0 && nostrPubkey) {
const reqHash = createHash("sha256").update(request).digest("hex");
void freeTierService.recordGrant(nostrPubkey, reqHash, partialAbsorbSats);
}
// Trust scoring — fire and forget
const pubkeyForTrust = nostrPubkey ?? (await getJobById(jobId))?.nostrPubkey ?? null;
if (pubkeyForTrust) {
@@ -337,6 +341,10 @@ async function advanceJob(job: Job): Promise<Job | null> {
job.btcPriceUsd,
job.freeTier ?? false,
job.nostrPubkey ?? null,
// For partial free-tier jobs (freeTier=true but user paid chargeSats),
// pass absorbedSats so the grant is recorded post-payment in runWorkInBackground.
// For fully-free jobs (isFree=true, workAmountSats=0), grant was already recorded at eval time.
(job.freeTier && (job.workAmountSats ?? 0) > 0) ? (job.absorbedSats ?? 0) : 0,
);
});

View File

@@ -328,6 +328,19 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
let reason: string | null = null;
let errorMessage: string | null = null;
// ── Pre-gate: free-tier decision on ESTIMATED cost before executing work ──
// Estimate cost so we can commit a budget reservation before calling the LLM,
// preventing a scenario where the pool is drained after we've already spent tokens.
let ftDecision: import("../lib/free-tier.js").FreeTierDecision | null = null;
if (evalResult.accepted && session.nostrPubkey) {
const { estimatedCostUsd } = pricingService.estimateRequestCost(requestText, agentService.workModel);
const estimatedSats = usdToSats(
pricingService.calculateActualChargeUsd(estimatedCostUsd),
btcPriceUsd,
);
ftDecision = await freeTierService.decide(session.nostrPubkey, estimatedSats);
}
if (evalResult.accepted) {
try {
const workResult = await agentService.executeWork(requestText);
@@ -353,20 +366,18 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
const chargeUsd = pricingService.calculateActualChargeUsd(totalTokenCostUsd);
const fullDebitSats = usdToSats(chargeUsd, btcPriceUsd);
// ── Free-tier gate (only on successful requests) ─────────────────────────
// ── Reconcile free-tier decision against actual cost ──────────────────────
// Cap absorbedSats at the actual cost so we never over-absorb from the pool.
let debitedSats = fullDebitSats;
let freeTierServed = false;
let absorbedSats = 0;
if (finalState === "complete" && session.nostrPubkey) {
const ftDecision = await freeTierService.decide(session.nostrPubkey, fullDebitSats);
if (ftDecision.serve !== "gate") {
absorbedSats = ftDecision.absorbSats;
debitedSats = ftDecision.chargeSats;
freeTierServed = true;
const reqHash = createHash("sha256").update(requestText).digest("hex");
void freeTierService.recordGrant(session.nostrPubkey, reqHash, absorbedSats);
}
if (finalState === "complete" && ftDecision && ftDecision.serve !== "gate") {
absorbedSats = Math.min(ftDecision.absorbSats, fullDebitSats);
debitedSats = Math.max(0, fullDebitSats - absorbedSats);
freeTierServed = true;
const reqHash = createHash("sha256").update(requestText).digest("hex");
void freeTierService.recordGrant(session.nostrPubkey!, reqHash, absorbedSats);
}
// Credit pool from paid portion (even if partial free tier)