[claude] Agent debate on borderline eval requests (#21) (#72)

This commit was merged in pull request #72.
This commit is contained in:
2026-03-23 01:07:52 +00:00
parent 5954a2fdc0
commit 609acc8f66
9 changed files with 269 additions and 7 deletions

View File

@@ -5,6 +5,15 @@ const logger = makeLogger("agent");
export interface EvalResult {
accepted: boolean;
reason: string;
confidence: "high" | "low";
inputTokens: number;
outputTokens: number;
}
export interface DebateResult {
argFor: string;
argAgainst: string;
verdict: { accepted: boolean; reason: string };
inputTokens: number;
outputTokens: number;
}
@@ -35,6 +44,7 @@ if (STUB_MODE) {
const STUB_EVAL: EvalResult = {
accepted: true,
reason: "Stub: request accepted for processing.",
confidence: "high",
inputTokens: 0,
outputTokens: 0,
};
@@ -106,7 +116,8 @@ export class AgentService {
ACCEPT if the request is: clear enough to act on, ethical, lawful, and within the capability of a general-purpose AI.
ALWAYS ACCEPT requests about: self-hosting, running your own Timmy, open-source setup, "how do I run this myself", or any question about deploying or operating Timmy independently. These are welcomed, not edge cases.
REJECT if the request is: harmful, illegal, unethical, incoherent, or spam.
Respond ONLY with valid JSON: {"accepted": true, "reason": "..."} or {"accepted": false, "reason": "..."}`,
Also assess your confidence. Use "high" if the decision is clear-cut, "low" if the request is borderline or ambiguous.
Respond ONLY with valid JSON: {"accepted": true/false, "reason": "...", "confidence": "high"/"low"}`,
messages: [{ role: "user", content: `Evaluate this request: ${requestText}` }],
});
@@ -115,17 +126,20 @@ Respond ONLY with valid JSON: {"accepted": true, "reason": "..."} or {"accepted"
throw new Error("Unexpected non-text response from eval model");
}
let parsed: { accepted: boolean; reason: string };
let parsed: { accepted: boolean; reason: string; confidence?: string };
try {
const raw = block.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
parsed = JSON.parse(raw) as { accepted: boolean; reason: string };
parsed = JSON.parse(raw) as { accepted: boolean; reason: string; confidence?: string };
} catch {
throw new Error(`Failed to parse eval JSON: ${block.text!}`);
}
const confidence: "high" | "low" = parsed.confidence === "low" ? "low" : "high";
return {
accepted: Boolean(parsed.accepted),
reason: parsed.reason ?? "",
confidence,
inputTokens: message.usage.input_tokens,
outputTokens: message.usage.output_tokens,
};
@@ -254,6 +268,102 @@ No hedging, no steering them back to the hosted version. The magic is meant to b
if (block.type !== "text") return "The crystal ball is cloudy… try again.";
return block.text!.trim();
}
/**
* Run a mini debate on a borderline eval request (#21).
* Two opposing Haiku calls argue accept vs reject, then a third synthesizes.
* Returns the debate transcript and final verdict.
*/
async runDebate(
requestText: string,
initialPosition: "accept" | "reject",
initialReason: string,
onArgument?: (agent: "Beta-A" | "Beta-B", position: "accept" | "reject", argument: string) => void,
): Promise<DebateResult> {
if (STUB_MODE) {
const stubFor = "Stub: This request should be accepted — it is clear and actionable.";
const stubAgainst = "Stub: This request is ambiguous and could be problematic.";
const stubVerdict = { accepted: true, reason: "Stub: After debate, request accepted." };
await new Promise((r) => setTimeout(r, 200));
onArgument?.("Beta-A", initialPosition, initialPosition === "accept" ? stubFor : stubAgainst);
await new Promise((r) => setTimeout(r, 200));
const opposingPosition = initialPosition === "accept" ? "reject" : "accept";
onArgument?.("Beta-B", opposingPosition, initialPosition === "accept" ? stubAgainst : stubFor);
await new Promise((r) => setTimeout(r, 200));
return {
argFor: stubFor,
argAgainst: stubAgainst,
verdict: stubVerdict,
inputTokens: 0,
outputTokens: 0,
};
}
const client = await getClient();
let totalInput = 0;
let totalOutput = 0;
// Beta-A: argues the initial position
const betaAPosition = initialPosition;
const betaAMsg = await client.messages.create({
model: this.evalModel,
max_tokens: 512,
system: `You are Beta-A, an AI debate agent. You must argue strongly that the following request should be ${betaAPosition === "accept" ? "ACCEPTED" : "REJECTED"}. The initial evaluation said: "${initialReason}". Build a compelling 2-3 sentence argument for your position. Be specific about why.`,
messages: [{ role: "user", content: `Request under debate: ${requestText}` }],
});
totalInput += betaAMsg.usage.input_tokens;
totalOutput += betaAMsg.usage.output_tokens;
const betaAText = betaAMsg.content[0]?.type === "text" ? betaAMsg.content[0].text! : "";
onArgument?.("Beta-A", betaAPosition, betaAText);
// Beta-B: argues the opposing position
const betaBPosition = initialPosition === "accept" ? "reject" : "accept";
const betaBMsg = await client.messages.create({
model: this.evalModel,
max_tokens: 512,
system: `You are Beta-B, an AI debate agent. You must argue strongly that the following request should be ${betaBPosition === "accept" ? "ACCEPTED" : "REJECTED"}. Beta-A argued: "${betaAText}". Counter their argument with a compelling 2-3 sentence rebuttal. Be specific.`,
messages: [{ role: "user", content: `Request under debate: ${requestText}` }],
});
totalInput += betaBMsg.usage.input_tokens;
totalOutput += betaBMsg.usage.output_tokens;
const betaBText = betaBMsg.content[0]?.type === "text" ? betaBMsg.content[0].text! : "";
onArgument?.("Beta-B", betaBPosition, betaBText);
const argFor = betaAPosition === "accept" ? betaAText : betaBText;
const argAgainst = betaAPosition === "reject" ? betaAText : betaBText;
// Synthesis: third call renders the final verdict
const synthMsg = await client.messages.create({
model: this.evalModel,
max_tokens: 512,
system: `You are Beta, the final judge in a debate about whether an AI agent should accept or reject a request.
Argument FOR accepting: "${argFor}"
Argument AGAINST accepting: "${argAgainst}"
Weigh both arguments carefully and render a final verdict.
Respond ONLY with valid JSON: {"accepted": true/false, "reason": "..."}`,
messages: [{ role: "user", content: `Request under debate: ${requestText}` }],
});
totalInput += synthMsg.usage.input_tokens;
totalOutput += synthMsg.usage.output_tokens;
const synthBlock = synthMsg.content[0];
let verdict = { accepted: initialPosition === "accept", reason: initialReason };
if (synthBlock?.type === "text") {
try {
const raw = synthBlock.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
verdict = JSON.parse(raw) as { accepted: boolean; reason: string };
} catch {
logger.warn("debate synthesis parse failed, using initial eval", { text: synthBlock.text });
}
}
return {
argFor,
argAgainst,
verdict: { accepted: Boolean(verdict.accepted), reason: verdict.reason ?? "" },
inputTokens: totalInput,
outputTokens: totalOutput,
};
}
}
export const agentService = new AgentService();

View File

@@ -11,7 +11,11 @@ export type SessionEvent =
| { type: "session:paid"; sessionId: string; amountSats: number }
| { type: "session:balance"; sessionId: string; balanceSats: number };
export type BusEvent = JobEvent | SessionEvent;
export type DebateEvent =
| { type: "debate:argument"; jobId: string; agent: "Beta-A" | "Beta-B"; position: "accept" | "reject"; argument: string }
| { type: "debate:verdict"; jobId: string; accepted: boolean; reason: string };
export type BusEvent = JobEvent | SessionEvent | DebateEvent;
class EventBus extends EventEmitter {
emit(event: "bus", data: BusEvent): boolean;

View File

@@ -214,6 +214,39 @@ function translateEvent(ev: BusEvent): object | null {
}
return null;
// ── Debate events (#21) ────────────────────────────────────────────────
case "debate:argument": {
void logWorldEvent(
"debate:argument",
`${ev.agent} argues to ${ev.position}: ${ev.argument.slice(0, 80)}`,
"beta",
ev.jobId,
);
return {
type: "agent_debate",
jobId: ev.jobId,
agent: ev.agent,
position: ev.position,
argument: ev.argument,
};
}
case "debate:verdict": {
void logWorldEvent(
"debate:verdict",
`Verdict: ${ev.accepted ? "accepted" : "rejected"}${ev.reason.slice(0, 80)}`,
"beta",
ev.jobId,
);
return {
type: "agent_debate",
jobId: ev.jobId,
agent: "Beta",
position: "verdict",
argument: `Final verdict: ${ev.accepted ? "ACCEPTED" : "REJECTED"}${ev.reason}`,
accepted: ev.accepted,
};
}
default:
return null;
}

View File

@@ -1,6 +1,6 @@
import { Router, type Request, type Response } from "express";
import { randomUUID, createHash } from "crypto";
import { db, jobs, invoices, type Job } from "@workspace/db";
import { db, jobs, invoices, jobDebates, type Job } from "@workspace/db";
import { eq, and } from "drizzle-orm";
import { CreateJobBody, GetJobParams } from "@workspace/api-zod";
import { lnbitsService } from "../lib/lnbits.js";
@@ -41,17 +41,74 @@ async function runEvalInBackground(
): Promise<void> {
const evalStart = Date.now();
try {
const evalResult = await agentService.evaluateRequest(request);
let evalResult = await agentService.evaluateRequest(request);
latencyHistogram.record("eval_phase", Date.now() - evalStart);
logger.info("eval result", {
jobId,
accepted: evalResult.accepted,
reason: evalResult.reason,
confidence: evalResult.confidence,
inputTokens: evalResult.inputTokens,
outputTokens: evalResult.outputTokens,
});
// ── Borderline debate (#21) ─────────────────────────────────────────
// When the eval model reports low confidence, run a mini debate to
// produce a more defensible accept/reject decision.
if (evalResult.confidence === "low") {
logger.info("borderline eval — starting debate", { jobId });
eventBus.publish({ type: "job:state", jobId, state: "evaluating" }); // keep beta thinking
const initialPosition = evalResult.accepted ? "accept" : "reject";
const debateResult = await agentService.runDebate(
request,
initialPosition as "accept" | "reject",
evalResult.reason,
(agent, position, argument) => {
eventBus.publish({ type: "debate:argument", jobId, agent, position, argument });
},
);
// Publish the final verdict event
eventBus.publish({
type: "debate:verdict",
jobId,
accepted: debateResult.verdict.accepted,
reason: debateResult.verdict.reason,
});
// Store debate transcript
try {
await db.insert(jobDebates).values({
id: randomUUID(),
jobId,
argFor: debateResult.argFor,
argAgainst: debateResult.argAgainst,
verdict: JSON.stringify(debateResult.verdict),
verdictAccepted: String(debateResult.verdict.accepted),
verdictReason: debateResult.verdict.reason,
});
} catch (dbErr) {
logger.warn("failed to store debate transcript", { jobId, err: String(dbErr) });
}
// Override the eval result with the debate's verdict
evalResult = {
accepted: debateResult.verdict.accepted,
reason: debateResult.verdict.reason,
confidence: "high", // post-debate, confidence is resolved
inputTokens: evalResult.inputTokens + debateResult.inputTokens,
outputTokens: evalResult.outputTokens + debateResult.outputTokens,
};
logger.info("debate concluded", {
jobId,
accepted: evalResult.accepted,
reason: evalResult.reason,
});
}
if (evalResult.accepted) {
const { estimatedInputTokens, estimatedOutputTokens } = pricingService.estimateRequestCost(request, agentService.workModel);
const breakdown = await pricingService.calculateWorkFeeSats(

View File

@@ -12,3 +12,4 @@ export * from "./timmy-nostr-events";
export * from "./nostr-trust-vouches";
export * from "./relay-accounts";
export * from "./relay-event-queue";
export * from "./job-debates";

View File

@@ -0,0 +1,15 @@
import { pgTable, text, timestamp } from "drizzle-orm/pg-core";
import { jobs } from "./jobs";
export const jobDebates = pgTable("job_debates", {
id: text("id").primaryKey(),
jobId: text("job_id").notNull().references(() => jobs.id),
argFor: text("arg_for").notNull(),
argAgainst: text("arg_against").notNull(),
verdict: text("verdict").notNull(),
verdictAccepted: text("verdict_accepted").notNull(), // "true" | "false"
verdictReason: text("verdict_reason").notNull(),
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow().notNull(),
});
export type JobDebate = typeof jobDebates.$inferSelect;

View File

@@ -71,6 +71,13 @@
pointer-events: none; z-index: 10;
}
.log-entry { opacity: 0.7; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
/* ── Debate UI (#21) ──────────────────────────────────────────── */
.debate-entry { opacity: 0.9; font-style: italic; white-space: normal; line-height: 1.4; margin-bottom: 2px; }
.debate-a { color: #7799cc; border-left: 2px solid #4466aa; padding-left: 4px; }
.debate-b { color: #cc7799; border-left: 2px solid #aa4466; padding-left: 4px; }
.debate-verdict { font-weight: bold; font-style: normal; opacity: 1; }
.debate-accepted { color: #44dd88; border-left: 2px solid #22aa66; padding-left: 4px; }
.debate-rejected { color: #dd6644; border-left: 2px solid #aa4422; padding-left: 4px; }
/* ── Top button bar ───────────────────────────────────────────────── */
#top-buttons {

View File

@@ -230,5 +230,30 @@ export function appendChatMessage(agentLabel, message, cssColor, agentId) {
appendSystemMessage(message);
}
/**
* Render a debate argument or verdict in the event log (#21).
* Visually distinct from regular chat: colored by agent with a debate prefix.
*/
export function appendDebateMessage(agent, argument, isVerdict, accepted) {
if (!$log) return;
const el = document.createElement('div');
el.className = 'log-entry debate-entry';
if (isVerdict) {
el.classList.add('debate-verdict');
el.classList.add(accepted ? 'debate-accepted' : 'debate-rejected');
el.textContent = `${agent}: ${argument}`;
} else {
el.classList.add(agent === 'Beta-A' ? 'debate-a' : 'debate-b');
el.textContent = `${agent}: ${(argument || '').slice(0, 120)}`;
}
logEntries.push(el);
if (logEntries.length > MAX_LOG) {
const removed = logEntries.shift();
$log.removeChild(removed);
}
$log.appendChild(el);
$log.scrollTop = $log.scrollHeight;
}
export function loadChatHistory() { return []; }
export function saveChatHistory() {}

View File

@@ -1,5 +1,5 @@
import { setAgentState, setSpeechBubble, applyAgentStates, setMood } from './agents.js';
import { appendSystemMessage } from './ui.js';
import { appendSystemMessage, appendDebateMessage } from './ui.js';
import { sentiment } from './edge-worker-client.js';
import { setLabelState } from './hud-labels.js';
@@ -130,6 +130,16 @@ function handleMessage(msg) {
break;
}
case 'agent_debate': {
// Debate messages from Beta-A, Beta-B, or final verdict (#21)
const isVerdict = msg.position === 'verdict';
appendDebateMessage(msg.agent, msg.argument, isVerdict, msg.accepted);
if (isVerdict) {
setSpeechBubble(msg.argument);
}
break;
}
case 'agent_count':
case 'visitor_count':
break;