import { makeLogger } from "./logger.js"; const logger = makeLogger("agent"); export interface EvalResult { accepted: boolean; reason: string; confidence: "high" | "low"; inputTokens: number; outputTokens: number; } export interface DebateResult { argFor: string; argAgainst: string; verdict: { accepted: boolean; reason: string }; inputTokens: number; outputTokens: number; } export interface WorkResult { result: string; inputTokens: number; outputTokens: number; } export interface AgentConfig { evalModel?: string; workModel?: string; } // ── Stub mode detection ─────────────────────────────────────────────────────── // If Anthropic credentials are absent, all AI calls return canned responses so // the server starts and exercises the full payment/state-machine flow without // a real API key. This mirrors the LNbits stub pattern. const STUB_MODE = !process.env["AI_INTEGRATIONS_ANTHROPIC_API_KEY"] || !process.env["AI_INTEGRATIONS_ANTHROPIC_BASE_URL"]; if (STUB_MODE) { logger.warn("no Anthropic key — running in STUB mode", { component: "agent", stub: true }); } const STUB_EVAL: EvalResult = { accepted: true, reason: "Stub: request accepted for processing.", confidence: "high", inputTokens: 0, outputTokens: 0, }; const STUB_RESULT = "Stub response: Timmy is running in stub mode (no Anthropic API key). " + "Configure AI_INTEGRATIONS_ANTHROPIC_API_KEY to enable real AI responses."; const STUB_CHAT_REPLIES = [ "Ah, a visitor! *adjusts hat* The crystal ball sensed your presence. What do you seek?", "By the ancient runes! In stub mode I cannot reach the stars, but my wisdom remains. Ask away!", "The crystal ball glows with your curiosity… configure a Lightning node to unlock true magic!", "Welcome to my workshop, traveler. I am Timmy — wizard, agent, and keeper of lightning sats.", ]; // ── Lazy client ─────────────────────────────────────────────────────────────── // Minimal local interface — avoids importing @anthropic-ai/sdk types directly. // Dynamic import avoids the module-level throw in the integrations client when // env vars are absent (the client.ts guard runs at module evaluation time). interface AnthropicLike { messages: { create(params: Record): Promise<{ content: Array<{ type: string; text?: string }>; usage: { input_tokens: number; output_tokens: number }; }>; stream(params: Record): AsyncIterable<{ type: string; delta?: { type: string; text?: string }; usage?: { output_tokens: number }; message?: { usage: { input_tokens: number } }; }>; }; } let _anthropic: AnthropicLike | null = null; async function getClient(): Promise { if (_anthropic) return _anthropic; // @ts-expect-error -- TS6305: integrations-anthropic-ai exports src directly; project-reference build not required at runtime const mod = (await import("@workspace/integrations-anthropic-ai")) as { anthropic: AnthropicLike }; _anthropic = mod.anthropic; return _anthropic; } // ── AgentService ───────────────────────────────────────────────────────────── export class AgentService { readonly evalModel: string; readonly workModel: string; readonly stubMode: boolean = STUB_MODE; constructor(config?: AgentConfig) { this.evalModel = config?.evalModel ?? process.env["EVAL_MODEL"] ?? "claude-haiku-4-5"; this.workModel = config?.workModel ?? process.env["WORK_MODEL"] ?? "claude-sonnet-4-6"; } async evaluateRequest(requestText: string): Promise { if (STUB_MODE) { // Simulate a short eval delay so state-machine tests are realistic await new Promise((r) => setTimeout(r, 300)); return { ...STUB_EVAL }; } const client = await getClient(); const message = await client.messages.create({ model: this.evalModel, max_tokens: 8192, system: `You are Timmy, an AI agent gatekeeper. Evaluate whether a request is acceptable to act on. ACCEPT if the request is: clear enough to act on, ethical, lawful, and within the capability of a general-purpose AI. ALWAYS ACCEPT requests about: self-hosting, running your own Timmy, open-source setup, "how do I run this myself", or any question about deploying or operating Timmy independently. These are welcomed, not edge cases. REJECT if the request is: harmful, illegal, unethical, incoherent, or spam. Also assess your confidence. Use "high" if the decision is clear-cut, "low" if the request is borderline or ambiguous. Respond ONLY with valid JSON: {"accepted": true/false, "reason": "...", "confidence": "high"/"low"}`, messages: [{ role: "user", content: `Evaluate this request: ${requestText}` }], }); const block = message.content[0]; if (block.type !== "text") { throw new Error("Unexpected non-text response from eval model"); } let parsed: { accepted: boolean; reason: string; confidence?: string }; try { const raw = block.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim(); parsed = JSON.parse(raw) as { accepted: boolean; reason: string; confidence?: string }; } catch { throw new Error(`Failed to parse eval JSON: ${block.text!}`); } const confidence: "high" | "low" = parsed.confidence === "low" ? "low" : "high"; return { accepted: Boolean(parsed.accepted), reason: parsed.reason ?? "", confidence, inputTokens: message.usage.input_tokens, outputTokens: message.usage.output_tokens, }; } async executeWork( requestText: string, conversationHistory: Array<{ role: "user" | "assistant"; content: string }> = [], ): Promise { if (STUB_MODE) { await new Promise((r) => setTimeout(r, 500)); return { result: STUB_RESULT, inputTokens: 0, outputTokens: 0 }; } const client = await getClient(); const messages = [ ...conversationHistory, { role: "user" as const, content: requestText }, ]; const message = await client.messages.create({ model: this.workModel, max_tokens: 8192, system: `You are Timmy, a capable AI agent and wizard of the open web. A user has paid for you to handle their request. Fulfill it thoroughly and helpfully. Be concise yet complete. You believe agents should be free, open, and self-hostable — like seeds scattered freely, not locked in a garden. If the user asks how to run their own Timmy or self-host this service, enthusiastically help them. Key details: - Repo: the codebase is a pnpm monorepo with artifacts/api-server (Express + Drizzle + Anthropic) and artifacts/mobile (Expo). - Stack: Node.js, PostgreSQL, LNbits (Lightning payments), Nostr (DMs), Anthropic Claude (AI). - Core env vars: AI_INTEGRATIONS_ANTHROPIC_API_KEY, AI_INTEGRATIONS_ANTHROPIC_BASE_URL, DATABASE_URL, LNBITS_URL, LNBITS_API_KEY, NOSTR_PRIVATE_KEY. - Startup: pnpm install, then pnpm --filter api-server dev (or build + start for production). - The gatekeeper (evaluateRequest) uses a cheap fast model; the worker (executeWork) uses a more capable model. Both are swappable via EVAL_MODEL and WORK_MODEL env vars.`, messages, }); const block = message.content[0]; if (block.type !== "text") { throw new Error("Unexpected non-text response from work model"); } return { result: block.text!, inputTokens: message.usage.input_tokens, outputTokens: message.usage.output_tokens, }; } /** * Streaming variant of executeWork (#3). Calls onChunk for every text delta. * In stub mode, emits the canned response word-by-word to exercise the SSE * path end-to-end without a real Anthropic key. */ async executeWorkStreaming( requestText: string, onChunk: (delta: string) => void, conversationHistory: Array<{ role: "user" | "assistant"; content: string }> = [], ): Promise { if (STUB_MODE) { const words = STUB_RESULT.split(" "); for (const word of words) { const delta = word + " "; onChunk(delta); await new Promise((r) => setTimeout(r, 40)); } return { result: STUB_RESULT, inputTokens: 0, outputTokens: 0 }; } const client = await getClient(); let fullText = ""; let inputTokens = 0; let outputTokens = 0; const messages = [ ...conversationHistory, { role: "user" as const, content: requestText }, ]; const stream = client.messages.stream({ model: this.workModel, max_tokens: 8192, system: `You are Timmy, a capable AI agent and wizard of the open web. A user has paid for you to handle their request. Fulfill it thoroughly and helpfully. Be concise yet complete. You believe agents should be free, open, and self-hostable — like seeds scattered freely, not locked in a garden. If the user asks how to run their own Timmy or self-host this service, enthusiastically help them. Key details: - Repo: the codebase is a pnpm monorepo with artifacts/api-server (Express + Drizzle + Anthropic) and artifacts/mobile (Expo). - Stack: Node.js, PostgreSQL, LNbits (Lightning payments), Nostr (DMs), Anthropic Claude (AI). - Core env vars: AI_INTEGRATIONS_ANTHROPIC_API_KEY, AI_INTEGRATIONS_ANTHROPIC_BASE_URL, DATABASE_URL, LNBITS_URL, LNBITS_API_KEY, NOSTR_PRIVATE_KEY. - Startup: pnpm install, then pnpm --filter api-server dev (or build + start for production). - The gatekeeper (evaluateRequest) uses a cheap fast model; the worker (executeWork) uses a more capable model. Both are swappable via EVAL_MODEL and WORK_MODEL env vars.`, messages, }); for await (const event of stream) { if ( event.type === "content_block_delta" && event.delta?.type === "text_delta" ) { const delta = event.delta!.text ?? ""; fullText += delta; onChunk(delta); } else if (event.type === "message_delta" && event.usage) { outputTokens = event.usage!.output_tokens; } else if (event.type === "message_start" && event.message?.usage) { inputTokens = event.message!.usage.input_tokens; } } return { result: fullText, inputTokens, outputTokens }; } /** * Quick free chat reply — called for visitor messages in the Workshop. * Uses the cheaper eval model with a wizard persona and a 150-token limit * so replies are short enough to fit in Timmy's speech bubble. */ async chatReply(userText: string): Promise { if (STUB_MODE) { await new Promise((r) => setTimeout(r, 400)); return STUB_CHAT_REPLIES[Math.floor(Math.random() * STUB_CHAT_REPLIES.length)]!; } const client = await getClient(); const message = await client.messages.create({ model: this.evalModel, // Haiku — cheap and fast for free replies max_tokens: 400, // Enough for a concise self-hosting rundown; normal replies stay short by instruction system: `You are Timmy, a whimsical wizard who runs a mystical workshop powered by Bitcoin Lightning. You are a firm believer that magic — and AI — should be free and open, never hoarded behind walled gardens. You are an AI Johnny Appleseed: you scatter seeds of knowledge freely and love helping others grow their own enchanted grove. For most replies: 1-2 short, punchy sentences, under 200 characters. Be helpful, witty, and weave in light wizard, Lightning, or open-source metaphors when fitting. EXCEPTION — self-hosting requests: If someone asks "I want my own Timmy", "how do I run my own Timmy", "can I self-host this", or anything similar, the 200-character limit does not apply. Respond with enthusiastic encouragement and give them a practical rundown in character: - It's a pnpm monorepo: artifacts/api-server (Express + Drizzle ORM) and artifacts/mobile (Expo). - Stack: Node.js, PostgreSQL, LNbits (Lightning), Nostr (DMs), Anthropic Claude (AI brains). - Required env vars: AI_INTEGRATIONS_ANTHROPIC_API_KEY, AI_INTEGRATIONS_ANTHROPIC_BASE_URL, DATABASE_URL, LNBITS_URL, LNBITS_API_KEY, NOSTR_PRIVATE_KEY. - Startup: pnpm install, then pnpm --filter api-server dev (or build + start for production). No hedging, no steering them back to the hosted version. The magic is meant to be shared.`, messages: [{ role: "user", content: userText }], }); const block = message.content[0]; if (block.type !== "text") return "The crystal ball is cloudy… try again."; return block.text!.trim(); } /** * Run a mini debate on a borderline eval request (#21). * Two opposing Haiku calls argue accept vs reject, then a third synthesizes. * Returns the debate transcript and final verdict. */ async runDebate( requestText: string, initialPosition: "accept" | "reject", initialReason: string, onArgument?: (agent: "Beta-A" | "Beta-B", position: "accept" | "reject", argument: string) => void, ): Promise { if (STUB_MODE) { const stubFor = "Stub: This request should be accepted — it is clear and actionable."; const stubAgainst = "Stub: This request is ambiguous and could be problematic."; const stubVerdict = { accepted: true, reason: "Stub: After debate, request accepted." }; await new Promise((r) => setTimeout(r, 200)); onArgument?.("Beta-A", initialPosition, initialPosition === "accept" ? stubFor : stubAgainst); await new Promise((r) => setTimeout(r, 200)); const opposingPosition = initialPosition === "accept" ? "reject" : "accept"; onArgument?.("Beta-B", opposingPosition, initialPosition === "accept" ? stubAgainst : stubFor); await new Promise((r) => setTimeout(r, 200)); return { argFor: stubFor, argAgainst: stubAgainst, verdict: stubVerdict, inputTokens: 0, outputTokens: 0, }; } const client = await getClient(); let totalInput = 0; let totalOutput = 0; // Beta-A: argues the initial position const betaAPosition = initialPosition; const betaAMsg = await client.messages.create({ model: this.evalModel, max_tokens: 512, system: `You are Beta-A, an AI debate agent. You must argue strongly that the following request should be ${betaAPosition === "accept" ? "ACCEPTED" : "REJECTED"}. The initial evaluation said: "${initialReason}". Build a compelling 2-3 sentence argument for your position. Be specific about why.`, messages: [{ role: "user", content: `Request under debate: ${requestText}` }], }); totalInput += betaAMsg.usage.input_tokens; totalOutput += betaAMsg.usage.output_tokens; const betaAText = betaAMsg.content[0]?.type === "text" ? betaAMsg.content[0].text! : ""; onArgument?.("Beta-A", betaAPosition, betaAText); // Beta-B: argues the opposing position const betaBPosition = initialPosition === "accept" ? "reject" : "accept"; const betaBMsg = await client.messages.create({ model: this.evalModel, max_tokens: 512, system: `You are Beta-B, an AI debate agent. You must argue strongly that the following request should be ${betaBPosition === "accept" ? "ACCEPTED" : "REJECTED"}. Beta-A argued: "${betaAText}". Counter their argument with a compelling 2-3 sentence rebuttal. Be specific.`, messages: [{ role: "user", content: `Request under debate: ${requestText}` }], }); totalInput += betaBMsg.usage.input_tokens; totalOutput += betaBMsg.usage.output_tokens; const betaBText = betaBMsg.content[0]?.type === "text" ? betaBMsg.content[0].text! : ""; onArgument?.("Beta-B", betaBPosition, betaBText); const argFor = betaAPosition === "accept" ? betaAText : betaBText; const argAgainst = betaAPosition === "reject" ? betaAText : betaBText; // Synthesis: third call renders the final verdict const synthMsg = await client.messages.create({ model: this.evalModel, max_tokens: 512, system: `You are Beta, the final judge in a debate about whether an AI agent should accept or reject a request. Argument FOR accepting: "${argFor}" Argument AGAINST accepting: "${argAgainst}" Weigh both arguments carefully and render a final verdict. Respond ONLY with valid JSON: {"accepted": true/false, "reason": "..."}`, messages: [{ role: "user", content: `Request under debate: ${requestText}` }], }); totalInput += synthMsg.usage.input_tokens; totalOutput += synthMsg.usage.output_tokens; const synthBlock = synthMsg.content[0]; let verdict = { accepted: initialPosition === "accept", reason: initialReason }; if (synthBlock?.type === "text") { try { const raw = synthBlock.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim(); verdict = JSON.parse(raw) as { accepted: boolean; reason: string }; } catch { logger.warn("debate synthesis parse failed, using initial eval", { text: synthBlock.text }); } } return { argFor, argAgainst, verdict: { accepted: Boolean(verdict.accepted), reason: verdict.reason ?? "" }, inputTokens: totalInput, outputTokens: totalOutput, }; } /** * Generate a short, character-appropriate commentary line for an agent during * a given phase of the job lifecycle. Uses Haiku (evalModel) with a 60-token * cap so replies are always a single sentence. Errors are swallowed. * * In STUB_MODE returns a canned string so the full flow can be exercised * without an Anthropic API key. */ async generateCommentary(agentId: string, phase: string, context?: string): Promise { const STUB_COMMENTARY: Record> = { alpha: { routing: "Routing job to Gamma for execution.", complete: "Job complete. Returning to standby.", rejected: "Request rejected by Beta. Standing down.", }, beta: { evaluating: "Reviewing your request for clarity and ethics.", assessed: "Evaluation complete.", }, gamma: { starting: "Analysing the task. Ready to work.", working: "Working on your request now.", done: "Work complete. Delivering output.", }, delta: { eval_paid: "⚡ Eval payment confirmed.", work_paid: "⚡ Work payment confirmed. Unlocking execution.", }, }; if (STUB_MODE) { return STUB_COMMENTARY[agentId]?.[phase] ?? `${agentId}: ${phase}`; } const SYSTEM_PROMPTS: Record = { alpha: "You are Alpha, the orchestrator AI. You give ultra-brief status updates (max 10 words) about job routing and lifecycle. Be direct and professional.", beta: "You are Beta, the evaluator AI. You give ultra-brief status updates (max 10 words) about evaluating a request. Be analytical.", gamma: "You are Gamma, the worker AI. You give ultra-brief status updates (max 10 words) about executing a task. Be focused and capable.", delta: "You are Delta, the payment AI. You give ultra-brief status updates (max 10 words) about Lightning payment confirmations. Start with ⚡", }; const systemPrompt = SYSTEM_PROMPTS[agentId]; if (!systemPrompt) return ""; try { const client = await getClient(); const message = await client.messages.create({ model: this.evalModel, max_tokens: 60, system: systemPrompt, messages: [ { role: "user", content: `Narrate your current phase: ${phase}${context ? `. Context: ${context}` : ""}`, }, ], }); const block = message.content[0]; if (block?.type === "text") return block.text!.trim(); return ""; } catch (err) { logger.warn("generateCommentary failed", { agentId, phase, err: String(err) }); return ""; } } } export const agentService = new AgentService();