Task #27: Apply 3 required fixes for cost-routing + free-tier gate
1. Add `estimateRequestCost(request, model)` to PricingService in pricing.ts - Unified method combining estimateInputTokens + estimateOutputTokens + calculateWorkFeeUsd - Replaces duplicated token estimation logic at call sites in jobs.ts, sessions.ts, estimate.ts 2. Move partial free-tier `recordGrant()` from invoice creation to post-work in runWorkInBackground - Previously called at invoice creation for partial path (before user pays) — economic DoS vulnerability - Now deferred to after work completes, using new `partialAbsorbSats` parameter in runWorkInBackground - Fully-free jobs still record grant at eval time (no payment involved) 3. Sessions pre-gate refactor: estimate → decide → execute → reconcile - Free-tier `decide()` now runs on ESTIMATED cost BEFORE `executeWork()` is called - After execution, `absorbedSats` is capped at actual cost (Math.min) to prevent over-absorption - Uses new `estimateRequestCost()` for clean single-call estimation
This commit is contained in:
@@ -159,6 +159,20 @@ export class PricingService {
|
||||
return { amountSats, estimatedCostUsd, marginPct: this.marginPct, btcPriceUsd };
|
||||
}
|
||||
|
||||
/**
|
||||
* Combined estimate: input tokens + output tokens + work fee USD for a given request.
|
||||
* Single call-site for pre-gate cost estimation — replaces duplicated logic in routes.
|
||||
*/
|
||||
estimateRequestCost(
|
||||
requestText: string,
|
||||
modelId: string,
|
||||
): { estimatedInputTokens: number; estimatedOutputTokens: number; estimatedCostUsd: number } {
|
||||
const estimatedInputTokens = this.estimateInputTokens(requestText);
|
||||
const estimatedOutputTokens = this.estimateOutputTokens(requestText);
|
||||
const estimatedCostUsd = this.calculateWorkFeeUsd(estimatedInputTokens, estimatedOutputTokens, modelId);
|
||||
return { estimatedInputTokens, estimatedOutputTokens, estimatedCostUsd };
|
||||
}
|
||||
|
||||
// ── Post-work honest accounting ──────────────────────────────────────────
|
||||
|
||||
/**
|
||||
|
||||
@@ -25,10 +25,9 @@ router.get("/estimate", async (req: Request, res: Response) => {
|
||||
}
|
||||
|
||||
try {
|
||||
const inputTokens = pricingService.estimateInputTokens(requestText);
|
||||
const outputTokens = pricingService.estimateOutputTokens(requestText);
|
||||
const { estimatedInputTokens: inputTokens, estimatedOutputTokens: outputTokens, estimatedCostUsd: costUsd } =
|
||||
pricingService.estimateRequestCost(requestText, agentService.workModel);
|
||||
const btcPriceUsd = await getBtcPriceUsd();
|
||||
const costUsd = pricingService.calculateWorkFeeUsd(inputTokens, outputTokens, agentService.workModel);
|
||||
const estimatedSats = usdToSats(costUsd, btcPriceUsd);
|
||||
|
||||
// Optionally resolve Nostr identity from query param or header for free-tier preview
|
||||
|
||||
@@ -52,11 +52,10 @@ async function runEvalInBackground(
|
||||
});
|
||||
|
||||
if (evalResult.accepted) {
|
||||
const inputEst = pricingService.estimateInputTokens(request);
|
||||
const outputEst = pricingService.estimateOutputTokens(request);
|
||||
const { estimatedInputTokens, estimatedOutputTokens } = pricingService.estimateRequestCost(request, agentService.workModel);
|
||||
const breakdown = await pricingService.calculateWorkFeeSats(
|
||||
inputEst,
|
||||
outputEst,
|
||||
estimatedInputTokens,
|
||||
estimatedOutputTokens,
|
||||
agentService.workModel,
|
||||
);
|
||||
|
||||
@@ -135,11 +134,8 @@ async function runEvalInBackground(
|
||||
.where(eq(jobs.id, jobId));
|
||||
});
|
||||
|
||||
// Record partial grant immediately (reserves pool capacity)
|
||||
if (ftDecision.serve === "partial" && nostrPubkey) {
|
||||
const reqHash = createHash("sha256").update(request).digest("hex");
|
||||
void freeTierService.recordGrant(nostrPubkey, reqHash, ftDecision.absorbSats);
|
||||
}
|
||||
// Partial grant is recorded AFTER payment + work completes (in runWorkInBackground)
|
||||
// to avoid economic DoS where pool is reserved before the user ever pays.
|
||||
|
||||
eventBus.publish({ type: "job:state", jobId, state: "awaiting_work_payment" });
|
||||
} else {
|
||||
@@ -183,6 +179,7 @@ async function runWorkInBackground(
|
||||
btcPriceUsd: number | null,
|
||||
isFree = false,
|
||||
nostrPubkey: string | null = null,
|
||||
partialAbsorbSats = 0,
|
||||
): Promise<void> {
|
||||
const workStart = Date.now();
|
||||
try {
|
||||
@@ -240,6 +237,13 @@ async function runWorkInBackground(
|
||||
void freeTierService.credit(workAmountSats);
|
||||
}
|
||||
|
||||
// Record partial free-tier grant now that work is confirmed complete.
|
||||
// Deferred from invoice creation to prevent economic DoS (pool reservation without payment).
|
||||
if (!isFree && partialAbsorbSats > 0 && nostrPubkey) {
|
||||
const reqHash = createHash("sha256").update(request).digest("hex");
|
||||
void freeTierService.recordGrant(nostrPubkey, reqHash, partialAbsorbSats);
|
||||
}
|
||||
|
||||
// Trust scoring — fire and forget
|
||||
const pubkeyForTrust = nostrPubkey ?? (await getJobById(jobId))?.nostrPubkey ?? null;
|
||||
if (pubkeyForTrust) {
|
||||
@@ -337,6 +341,10 @@ async function advanceJob(job: Job): Promise<Job | null> {
|
||||
job.btcPriceUsd,
|
||||
job.freeTier ?? false,
|
||||
job.nostrPubkey ?? null,
|
||||
// For partial free-tier jobs (freeTier=true but user paid chargeSats),
|
||||
// pass absorbedSats so the grant is recorded post-payment in runWorkInBackground.
|
||||
// For fully-free jobs (isFree=true, workAmountSats=0), grant was already recorded at eval time.
|
||||
(job.freeTier && (job.workAmountSats ?? 0) > 0) ? (job.absorbedSats ?? 0) : 0,
|
||||
);
|
||||
});
|
||||
|
||||
|
||||
@@ -328,6 +328,19 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
|
||||
let reason: string | null = null;
|
||||
let errorMessage: string | null = null;
|
||||
|
||||
// ── Pre-gate: free-tier decision on ESTIMATED cost before executing work ──
|
||||
// Estimate cost so we can commit a budget reservation before calling the LLM,
|
||||
// preventing a scenario where the pool is drained after we've already spent tokens.
|
||||
let ftDecision: import("../lib/free-tier.js").FreeTierDecision | null = null;
|
||||
if (evalResult.accepted && session.nostrPubkey) {
|
||||
const { estimatedCostUsd } = pricingService.estimateRequestCost(requestText, agentService.workModel);
|
||||
const estimatedSats = usdToSats(
|
||||
pricingService.calculateActualChargeUsd(estimatedCostUsd),
|
||||
btcPriceUsd,
|
||||
);
|
||||
ftDecision = await freeTierService.decide(session.nostrPubkey, estimatedSats);
|
||||
}
|
||||
|
||||
if (evalResult.accepted) {
|
||||
try {
|
||||
const workResult = await agentService.executeWork(requestText);
|
||||
@@ -353,20 +366,18 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
|
||||
const chargeUsd = pricingService.calculateActualChargeUsd(totalTokenCostUsd);
|
||||
const fullDebitSats = usdToSats(chargeUsd, btcPriceUsd);
|
||||
|
||||
// ── Free-tier gate (only on successful requests) ─────────────────────────
|
||||
// ── Reconcile free-tier decision against actual cost ──────────────────────
|
||||
// Cap absorbedSats at the actual cost so we never over-absorb from the pool.
|
||||
let debitedSats = fullDebitSats;
|
||||
let freeTierServed = false;
|
||||
let absorbedSats = 0;
|
||||
|
||||
if (finalState === "complete" && session.nostrPubkey) {
|
||||
const ftDecision = await freeTierService.decide(session.nostrPubkey, fullDebitSats);
|
||||
if (ftDecision.serve !== "gate") {
|
||||
absorbedSats = ftDecision.absorbSats;
|
||||
debitedSats = ftDecision.chargeSats;
|
||||
freeTierServed = true;
|
||||
const reqHash = createHash("sha256").update(requestText).digest("hex");
|
||||
void freeTierService.recordGrant(session.nostrPubkey, reqHash, absorbedSats);
|
||||
}
|
||||
if (finalState === "complete" && ftDecision && ftDecision.serve !== "gate") {
|
||||
absorbedSats = Math.min(ftDecision.absorbSats, fullDebitSats);
|
||||
debitedSats = Math.max(0, fullDebitSats - absorbedSats);
|
||||
freeTierServed = true;
|
||||
const reqHash = createHash("sha256").update(requestText).digest("hex");
|
||||
void freeTierService.recordGrant(session.nostrPubkey!, reqHash, absorbedSats);
|
||||
}
|
||||
|
||||
// Credit pool from paid portion (even if partial free tier)
|
||||
|
||||
Reference in New Issue
Block a user