feat: inject conversation history into session work model

- Add session_messages table and migration for storing conversation turns - Add getSessionHistory() helper to load recent history with token budget - Pass conversation history to executeWork() and executeWorkStreaming() - Persist user/assistant exchanges after completed requests - Rejected/failed requests do not pollute history Fixes #39 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-22 21:50:55 -04:00
parent 4c747aa331
commit 43c948552a
6 changed files with 104 additions and 5 deletions
--- a/artifacts/api-server/src/lib/agent.ts
+++ b/artifacts/api-server/src/lib/agent.ts
@@ -145,13 +145,20 @@ Respond ONLY with valid JSON: {"accepted": true/false, "reason": "...", "confide
    };
  }

-  async executeWork(requestText: string): Promise<WorkResult> {
+  async executeWork(
+    requestText: string,
+    conversationHistory: Array<{ role: "user" | "assistant"; content: string }> = [],
+  ): Promise<WorkResult> {
    if (STUB_MODE) {
      await new Promise((r) => setTimeout(r, 500));
      return { result: STUB_RESULT, inputTokens: 0, outputTokens: 0 };
    }

    const client = await getClient();
+    const messages = [
+      ...conversationHistory,
+      { role: "user" as const, content: requestText },
+    ];
    const message = await client.messages.create({
      model: this.workModel,
      max_tokens: 8192,
@@ -164,7 +171,7 @@ If the user asks how to run their own Timmy or self-host this service, enthusias
 - Core env vars: AI_INTEGRATIONS_ANTHROPIC_API_KEY, AI_INTEGRATIONS_ANTHROPIC_BASE_URL, DATABASE_URL, LNBITS_URL, LNBITS_API_KEY, NOSTR_PRIVATE_KEY.
 - Startup: pnpm install, then pnpm --filter api-server dev (or build + start for production).
 - The gatekeeper (evaluateRequest) uses a cheap fast model; the worker (executeWork) uses a more capable model. Both are swappable via EVAL_MODEL and WORK_MODEL env vars.`,
-      messages: [{ role: "user", content: requestText }],
+      messages,
    });

    const block = message.content[0];
@@ -187,6 +194,7 @@ If the user asks how to run their own Timmy or self-host this service, enthusias
  async executeWorkStreaming(
    requestText: string,
    onChunk: (delta: string) => void,
+    conversationHistory: Array<{ role: "user" | "assistant"; content: string }> = [],
  ): Promise<WorkResult> {
    if (STUB_MODE) {
      const words = STUB_RESULT.split(" ");
@@ -203,6 +211,10 @@ If the user asks how to run their own Timmy or self-host this service, enthusias
    let inputTokens = 0;
    let outputTokens = 0;

+    const messages = [
+      ...conversationHistory,
+      { role: "user" as const, content: requestText },
+    ];
    const stream = client.messages.stream({
      model: this.workModel,
      max_tokens: 8192,
@@ -215,7 +227,7 @@ If the user asks how to run their own Timmy or self-host this service, enthusias
 - Core env vars: AI_INTEGRATIONS_ANTHROPIC_API_KEY, AI_INTEGRATIONS_ANTHROPIC_BASE_URL, DATABASE_URL, LNBITS_URL, LNBITS_API_KEY, NOSTR_PRIVATE_KEY.
 - Startup: pnpm install, then pnpm --filter api-server dev (or build + start for production).
 - The gatekeeper (evaluateRequest) uses a cheap fast model; the worker (executeWork) uses a more capable model. Both are swappable via EVAL_MODEL and WORK_MODEL env vars.`,
-      messages: [{ role: "user", content: requestText }],
+      messages,
    });

    for await (const event of stream) {
--- a/artifacts/api-server/src/routes/sessions.ts
+++ b/artifacts/api-server/src/routes/sessions.ts
@@ -1,6 +1,6 @@
 import { Router, type Request, type Response } from "express";
 import { randomBytes, randomUUID, createHash } from "crypto";
-import { db, sessions, sessionRequests, type Session } from "@workspace/db";
+import { db, sessions, sessionRequests, sessionMessages, getSessionHistory, type Session } from "@workspace/db";
 import { eq, and } from "drizzle-orm";
 import { lnbitsService } from "../lib/lnbits.js";
 import { sessionsLimiter } from "../lib/rate-limiter.js";
@@ -312,6 +312,9 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
    const requestId = randomUUID();
    const btcPriceUsd = await getBtcPriceUsd();

+    // Load conversation history for context injection
+    const history = await getSessionHistory(id, 8, 4000);
+
    // Eval phase
    const evalResult = await agentService.evaluateRequest(requestText);
    const evalCostUsd = pricingService.calculateActualCostUsd(
@@ -343,7 +346,7 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {

    if (evalResult.accepted) {
      try {
-        const workResult = await agentService.executeWork(requestText);
+        const workResult = await agentService.executeWork(requestText, history);
        workInputTokens = workResult.inputTokens;
        workOutputTokens = workResult.outputTokens;
        workCostUsd = pricingService.calculateActualCostUsd(
@@ -452,6 +455,14 @@ router.post("/sessions/:id/request", async (req: Request, res: Response) => {
          updatedAt: new Date(),
        })
        .where(eq(sessions.id, id));
+
+      // Persist conversation history only for completed requests
+      if (finalState === "complete") {
+        await tx.insert(sessionMessages).values([
+          { sessionId: id, role: "user" as const, content: requestText, tokenCount: Math.ceil(requestText.length / 4) },
+          { sessionId: id, role: "assistant" as const, content: result ?? "", tokenCount: Math.ceil((result ?? "").length / 4) },
+        ]);
+      }
    });

    // ── Trust scoring ────────────────────────────────────────────────────────