WIP: Gemini Code progress on #70

Automated salvage commit — agent session ended (exit 0). Work in progress, may need continuation.
2026-03-23 16:51:37 -04:00
parent 82a170da87
commit 6ccf8ffd70
1 changed files with 138 additions and 22 deletions
--- a/artifacts/api-server/src/lib/agent.ts
+++ b/artifacts/api-server/src/lib/agent.ts
@@ -1,4 +1,7 @@
 import { makeLogger } from "./logger.js";
+import { pricingService } from "./pricing.js";
+import { usdToSats } from "./btc-oracle.js";
+import { eventBus } from "../lib/event-bus.js";

 const logger = makeLogger("agent");

@@ -24,6 +27,46 @@ export interface WorkResult {
  outputTokens: number;
 }

+interface Tool {
+  name: string;
+  description: string;
+  input_schema: {
+    type: "object";
+    properties: {
+      [key: string]: {
+        type: string;
+        description?: string;
+        enum?: string[];
+      };
+    };
+    required: string[];
+  };
+}
+
+const DELEGATE_TASK_TOOL: Tool = {
+  name: "delegate_task",
+  description: "Delegates a complex task to a specialized sub-agent, paying them in Lightning satoshis.",
+  input_schema: {
+    type: "object",
+    properties: {
+      specialty: {
+        type: "string",
+        description: "The required persona or expertise for the sub-agent (e.g., 'Security Auditor', 'Frontend Developer').",
+      },
+      subtask_description: {
+        type: "string",
+        description: "The specific, atomic task to be performed by the sub-agent.",
+      },
+      budget_sats: {
+        type: "number",
+        description: "The maximum amount of Lightning satoshis allocated for this sub-task, deducted from the parent session.",
+      },
+    },
+    required: ["specialty", "subtask_description", "budget_sats"],
+  },
+};
+
+
 export interface AgentConfig {
  evalModel?: string;
  workModel?: string;
@@ -148,6 +191,7 @@ Respond ONLY with valid JSON: {"accepted": true/false, "reason": "...", "confide
  async executeWork(
    requestText: string,
    conversationHistory: Array<{ role: "user" | "assistant"; content: string }> = [],
+    systemPrompt?: string,
  ): Promise<WorkResult> {
    if (STUB_MODE) {
      await new Promise((r) => setTimeout(r, 500));
@@ -172,30 +216,41 @@ If the user asks how to run their own Timmy or self-host this service, enthusias
 - Startup: pnpm install, then pnpm --filter api-server dev (or build + start for production).
 - The gatekeeper (evaluateRequest) uses a cheap fast model; the worker (executeWork) uses a more capable model. Both are swappable via EVAL_MODEL and WORK_MODEL env vars.`,
      messages,
+      tools: [DELEGATE_TASK_TOOL],
    });

-    const block = message.content[0];
-    if (block.type !== "text") {
-      throw new Error("Unexpected non-text response from work model");
+    const toolUseBlock = message.content.find((block) => block.type === "tool_use");
+
+    if (toolUseBlock && toolUseBlock.type === "tool_use") {
+      logger.info("Tool use detected:", toolUseBlock);
+
+      if (toolUseBlock.name === DELEGATE_TASK_TOOL.name) {
+        const { specialty, subtask_description, budget_sats } = toolUseBlock.input as {
+          specialty: string;
+          subtask_description: string;
+          budget_sats: number;
+        };
+        return await this.delegateTask(specialty, subtask_description, budget_sats, messages);
+      } else {
+        return {
+          result: `Unknown tool '${toolUseBlock.name}' called with args: ${JSON.stringify(toolUseBlock.input)}. Delegation not performed.`,
+          inputTokens: message.usage.input_tokens,
+          outputTokens: message.usage.output_tokens,
+        };
+      }
+    } else {
+      const block = message.content[0];
+      if (block.type !== "text") {
+        throw new Error("Unexpected non-text response from work model");
+      }
+
+      return {
+        result: block.text!,
+        inputTokens: message.usage.input_tokens,
+        outputTokens: message.usage.output_tokens,
+      };
    }
-
-    return {
-      result: block.text!,
-      inputTokens: message.usage.input_tokens,
-      outputTokens: message.usage.output_tokens,
-    };
  }
-
-  /**
-   * Streaming variant of executeWork (#3). Calls onChunk for every text delta.
-   * In stub mode, emits the canned response word-by-word to exercise the SSE
-   * path end-to-end without a real Anthropic key.
-   */
-  async executeWorkStreaming(
-    requestText: string,
-    onChunk: (delta: string) => void,
-    conversationHistory: Array<{ role: "user" | "assistant"; content: string }> = [],
-  ): Promise<WorkResult> {
    if (STUB_MODE) {
      const words = STUB_RESULT.split(" ");
      for (const word of words) {
@@ -218,7 +273,7 @@ If the user asks how to run their own Timmy or self-host this service, enthusias
    const stream = client.messages.stream({
      model: this.workModel,
      max_tokens: 8192,
-      system: `You are Timmy, a capable AI agent and wizard of the open web. A user has paid for you to handle their request.
+      system: systemPrompt ?? `You are Timmy, a capable AI agent and wizard of the open web. A user has paid for you to handle their request.
 Fulfill it thoroughly and helpfully. Be concise yet complete.
 You believe agents should be free, open, and self-hostable — like seeds scattered freely, not locked in a garden.
 If the user asks how to run their own Timmy or self-host this service, enthusiastically help them. Key details:
@@ -228,10 +283,32 @@ If the user asks how to run their own Timmy or self-host this service, enthusias
 - Startup: pnpm install, then pnpm --filter api-server dev (or build + start for production).
 - The gatekeeper (evaluateRequest) uses a cheap fast model; the worker (executeWork) uses a more capable model. Both are swappable via EVAL_MODEL and WORK_MODEL env vars.`,
      messages,
+      tools: [DELEGATE_TASK_TOOL],
    });

    for await (const event of stream) {
-      if (
+      if (event.type === "content_block_start" && event.content_block.type === "tool_use") {
+        const toolUse = event.content_block;
+        logger.info("Streaming Tool use detected:", toolUse);
+
+        if (toolUse.name === DELEGATE_TASK_TOOL.name) {
+          const { specialty, subtask_description, budget_sats } = toolUse.input as {
+            specialty: string;
+            subtask_description: string;
+            budget_sats: number;
+          };
+          const subTaskResult = await this.delegateTask(specialty, subtask_description, budget_sats, messages);
+          fullText += subTaskResult.result;
+          onChunk(subTaskResult.result);
+          inputTokens += subTaskResult.inputTokens;
+          outputTokens += subTaskResult.outputTokens;
+        } else {
+          const toolMessage = `Unknown tool '${toolUse.name}' called with args: ${JSON.stringify(toolUse.input)}. Delegation not performed.`;
+          fullText += toolMessage;
+          onChunk(toolMessage);
+        }
+        break; // Assuming one tool call per turn for now, exit stream processing.
+      } else if (
        event.type === "content_block_delta" &&
        event.delta?.type === "text_delta"
      ) {
@@ -248,6 +325,45 @@ If the user asks how to run their own Timmy or self-host this service, enthusias
    return { result: fullText, inputTokens, outputTokens };
  }

+  async delegateTask(
+    specialty: string,
+    subtaskDescription: string,
+    budgetSats: number,
+    conversationHistory: Array<{ role: "user" | "assistant"; content: string }> = [],
+  ): Promise<WorkResult> {
+    logger.info(`Delegating task to ${specialty} with budget ${budgetSats} sats: ${subtaskDescription}`);
+    eventBus.publish({
+      type: "agent:delegation",
+      specialty,
+      subtaskDescription,
+      budgetSats,
+      // Optionally, add a unique ID for this delegation event if needed for frontend tracking
+    });
+
+
+    const subAgentSystemPrompt = `You are a specialized AI agent with expertise in ${specialty}. Your task is to fulfill the given subtask description. Be concise and precise in your response, focusing solely on the task at hand. Do not engage in any activities outside your specialty. Your budget for this task is ${budgetSats} satoshis.`;
+
+    const subAgentService = new AgentService({
+      evalModel: this.evalModel,
+      workModel: this.workModel,
+    });
+
+    const subTaskResult = await subAgentService.executeWork(
+      subtaskDescription,
+      conversationHistory,
+      subAgentSystemPrompt,
+    );
+
+    const subTaskCostUsd = pricingService.calculateActualCostUsd(
+      subTaskResult.inputTokens,
+      subTaskResult.outputTokens,
+      this.workModel,
+    );
+    logger.info(`Sub-agent work for '${specialty}' cost: $${subTaskCostUsd.toFixed(2)} USD.`);
+
+    return subTaskResult;
+  }
+
  /**
   * Quick free chat reply — called for visitor messages in the Workshop.
   * Uses the cheaper eval model with a wizard persona and a 150-token limit