1. Add `estimateRequestCost(request, model)` to PricingService in pricing.ts - Unified method combining estimateInputTokens + estimateOutputTokens + calculateWorkFeeUsd - Replaces duplicated token estimation logic at call sites in jobs.ts, sessions.ts, estimate.ts 2. Move partial free-tier `recordGrant()` from invoice creation to post-work in runWorkInBackground - Previously called at invoice creation for partial path (before user pays) — economic DoS vulnerability - Now deferred to after work completes, using new `partialAbsorbSats` parameter in runWorkInBackground - Fully-free jobs still record grant at eval time (no payment involved) 3. Sessions pre-gate refactor: estimate → decide → execute → reconcile - Free-tier `decide()` now runs on ESTIMATED cost BEFORE `executeWork()` is called - After execution, `absorbedSats` is capped at actual cost (Math.min) to prevent over-absorption - Uses new `estimateRequestCost()` for clean single-call estimation
207 lines
8.7 KiB
TypeScript
207 lines
8.7 KiB
TypeScript
import { getBtcPriceUsd, usdToSats } from "./btc-oracle.js";
|
|
|
|
// ── Env-var helpers ────────────────────────────────────────────────────────────
|
|
|
|
function envFloat(name: string, fallback: number): number {
|
|
const raw = parseFloat(process.env[name] ?? "");
|
|
return Number.isFinite(raw) && raw > 0 ? raw : fallback;
|
|
}
|
|
|
|
function envInt(name: string, fallback: number): number {
|
|
const raw = parseInt(process.env[name] ?? "", 10);
|
|
return Number.isFinite(raw) && raw > 0 ? raw : fallback;
|
|
}
|
|
|
|
// ── Model rate tables ──────────────────────────────────────────────────────────
|
|
|
|
export interface ModelRates {
|
|
inputPer1kUsd: number;
|
|
outputPer1kUsd: number;
|
|
}
|
|
|
|
/**
|
|
* Anthropic model rates (USD per 1 000 tokens).
|
|
* Defaults approximate claude-haiku-4-x / claude-sonnet-4-x pricing.
|
|
* Override via env vars.
|
|
*/
|
|
const HAIKU_RATES: ModelRates = {
|
|
inputPer1kUsd: envFloat("HAIKU_INPUT_COST_PER_1K_TOKENS", 0.0008),
|
|
outputPer1kUsd: envFloat("HAIKU_OUTPUT_COST_PER_1K_TOKENS", 0.004),
|
|
};
|
|
|
|
const SONNET_RATES: ModelRates = {
|
|
inputPer1kUsd: envFloat("SONNET_INPUT_COST_PER_1K_TOKENS", 0.003),
|
|
outputPer1kUsd: envFloat("SONNET_OUTPUT_COST_PER_1K_TOKENS", 0.015),
|
|
};
|
|
|
|
function ratesForModel(modelId: string): ModelRates {
|
|
const id = modelId.toLowerCase();
|
|
if (id.includes("haiku")) return HAIKU_RATES;
|
|
if (id.includes("sonnet")) return SONNET_RATES;
|
|
return SONNET_RATES; // conservative fallback
|
|
}
|
|
|
|
// ── Output token estimates by request length tier ─────────────────────────────
|
|
|
|
const OUTPUT_EST_SHORT = envInt("OUTPUT_TOKENS_SHORT_EST", 200);
|
|
const OUTPUT_EST_MEDIUM = envInt("OUTPUT_TOKENS_MEDIUM_EST", 400);
|
|
const OUTPUT_EST_LONG = envInt("OUTPUT_TOKENS_LONG_EST", 800);
|
|
const SHORT_MAX_CHARS = envInt("SHORT_MAX_CHARS", 100);
|
|
const MEDIUM_MAX_CHARS = envInt("MEDIUM_MAX_CHARS", 300);
|
|
|
|
// Approximate tokens in the work system prompt (for input token estimation).
|
|
const WORK_SYSTEM_PROMPT_TOKENS = envInt("WORK_SYSTEM_PROMPT_TOKENS_EST", 50);
|
|
|
|
// ── DO infra amortisation ─────────────────────────────────────────────────────
|
|
|
|
const DO_MONTHLY_COST_USD = envFloat("DO_MONTHLY_COST_USD", 100);
|
|
const DO_MONTHLY_REQUESTS = envInt("DO_MONTHLY_REQUEST_VOLUME", 1000);
|
|
const DO_INFRA_PER_REQUEST_USD = DO_MONTHLY_COST_USD / DO_MONTHLY_REQUESTS;
|
|
|
|
// ── Originator margin ──────────────────────────────────────────────────────────
|
|
|
|
const ORIGINATOR_MARGIN_PCT = envFloat("ORIGINATOR_MARGIN_PCT", 25);
|
|
|
|
// ── Fixed fees ────────────────────────────────────────────────────────────────
|
|
|
|
const EVAL_FEE_SATS = envInt("EVAL_FEE_SATS", 10);
|
|
|
|
const BOOTSTRAP_FEE_SATS = (() => {
|
|
const raw = parseInt(process.env.BOOTSTRAP_FEE_SATS ?? "", 10);
|
|
return Number.isFinite(raw) && raw > 0 ? raw : 10_000;
|
|
})();
|
|
|
|
// ── Public types ──────────────────────────────────────────────────────────────
|
|
|
|
export interface WorkFeeBreakdown {
|
|
amountSats: number;
|
|
estimatedCostUsd: number;
|
|
marginPct: number;
|
|
btcPriceUsd: number;
|
|
}
|
|
|
|
// ── PricingService ────────────────────────────────────────────────────────────
|
|
|
|
export class PricingService {
|
|
readonly marginPct = ORIGINATOR_MARGIN_PCT;
|
|
|
|
// ── Fixed fees (unchanged from v1) ───────────────────────────────────────
|
|
|
|
calculateEvalFeeSats(): number {
|
|
return EVAL_FEE_SATS;
|
|
}
|
|
|
|
calculateBootstrapFeeSats(): number {
|
|
return BOOTSTRAP_FEE_SATS;
|
|
}
|
|
|
|
// ── Token estimation ─────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Estimate input tokens for a work request.
|
|
* Uses chars/4 rule for the user message plus a fixed system-prompt overhead.
|
|
*/
|
|
estimateInputTokens(requestText: string): number {
|
|
return Math.ceil(requestText.length / 4) + WORK_SYSTEM_PROMPT_TOKENS;
|
|
}
|
|
|
|
/**
|
|
* Estimate output tokens based on request length tier.
|
|
*/
|
|
estimateOutputTokens(requestText: string): number {
|
|
const len = requestText.trim().length;
|
|
if (len <= SHORT_MAX_CHARS) return OUTPUT_EST_SHORT;
|
|
if (len <= MEDIUM_MAX_CHARS) return OUTPUT_EST_MEDIUM;
|
|
return OUTPUT_EST_LONG;
|
|
}
|
|
|
|
// ── Cost calculation (pure, no oracle) ──────────────────────────────────
|
|
|
|
/**
|
|
* Calculate the total USD cost for a set of token counts + model.
|
|
* Includes DO infra amortisation and originator margin.
|
|
*/
|
|
calculateWorkFeeUsd(inputTokens: number, outputTokens: number, modelId: string): number {
|
|
const rates = ratesForModel(modelId);
|
|
const tokenCostUsd =
|
|
(inputTokens / 1000) * rates.inputPer1kUsd +
|
|
(outputTokens / 1000) * rates.outputPer1kUsd;
|
|
const rawCostUsd = tokenCostUsd + DO_INFRA_PER_REQUEST_USD;
|
|
return rawCostUsd * (1 + this.marginPct / 100);
|
|
}
|
|
|
|
/**
|
|
* Calculate actual token cost (no infra, no margin — raw Anthropic spend).
|
|
* Used for the post-work cost ledger.
|
|
*/
|
|
calculateActualCostUsd(inputTokens: number, outputTokens: number, modelId: string): number {
|
|
const rates = ratesForModel(modelId);
|
|
return (
|
|
(inputTokens / 1000) * rates.inputPer1kUsd +
|
|
(outputTokens / 1000) * rates.outputPer1kUsd
|
|
);
|
|
}
|
|
|
|
// ── Invoice amount (calls oracle) ────────────────────────────────────────
|
|
|
|
/**
|
|
* Fetch BTC price, convert USD cost to sats, and return the full breakdown.
|
|
* This is the main entry point for generating a work invoice amount.
|
|
*/
|
|
async calculateWorkFeeSats(
|
|
inputTokens: number,
|
|
outputTokens: number,
|
|
modelId: string,
|
|
): Promise<WorkFeeBreakdown> {
|
|
const estimatedCostUsd = this.calculateWorkFeeUsd(inputTokens, outputTokens, modelId);
|
|
const btcPriceUsd = await getBtcPriceUsd();
|
|
const amountSats = usdToSats(estimatedCostUsd, btcPriceUsd);
|
|
return { amountSats, estimatedCostUsd, marginPct: this.marginPct, btcPriceUsd };
|
|
}
|
|
|
|
/**
|
|
* Combined estimate: input tokens + output tokens + work fee USD for a given request.
|
|
* Single call-site for pre-gate cost estimation — replaces duplicated logic in routes.
|
|
*/
|
|
estimateRequestCost(
|
|
requestText: string,
|
|
modelId: string,
|
|
): { estimatedInputTokens: number; estimatedOutputTokens: number; estimatedCostUsd: number } {
|
|
const estimatedInputTokens = this.estimateInputTokens(requestText);
|
|
const estimatedOutputTokens = this.estimateOutputTokens(requestText);
|
|
const estimatedCostUsd = this.calculateWorkFeeUsd(estimatedInputTokens, estimatedOutputTokens, modelId);
|
|
return { estimatedInputTokens, estimatedOutputTokens, estimatedCostUsd };
|
|
}
|
|
|
|
// ── Post-work honest accounting ──────────────────────────────────────────
|
|
|
|
/**
|
|
* Full actual charge in USD: raw Anthropic token cost + DO infra amortisation + margin.
|
|
* Pass in the already-computed actualCostUsd (raw token cost, no extras).
|
|
*/
|
|
calculateActualChargeUsd(actualCostUsd: number): number {
|
|
const rawCostUsd = actualCostUsd + DO_INFRA_PER_REQUEST_USD;
|
|
return rawCostUsd * (1 + this.marginPct / 100);
|
|
}
|
|
|
|
/**
|
|
* Convert the actual charge to satoshis using the BTC price that was locked
|
|
* at invoice-creation time. This keeps pre- and post-work accounting in the
|
|
* same BTC denomination without a second oracle call.
|
|
*/
|
|
calculateActualChargeSats(actualCostUsd: number, lockedBtcPriceUsd: number): number {
|
|
const chargeUsd = this.calculateActualChargeUsd(actualCostUsd);
|
|
return usdToSats(chargeUsd, lockedBtcPriceUsd);
|
|
}
|
|
|
|
/**
|
|
* Refund amount in sats: what was overpaid by the user.
|
|
* Always >= 0 (clamped — never ask the user to top up due to BTC price swings).
|
|
*/
|
|
calculateRefundSats(workAmountSats: number, actualAmountSats: number): number {
|
|
return Math.max(0, workAmountSats - actualAmountSats);
|
|
}
|
|
}
|
|
|
|
export const pricingService = new PricingService();
|