[claude] Agent debate on borderline eval requests (#21) #72
@@ -5,6 +5,15 @@ const logger = makeLogger("agent");
|
||||
export interface EvalResult {
|
||||
accepted: boolean;
|
||||
reason: string;
|
||||
confidence: "high" | "low";
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
}
|
||||
|
||||
export interface DebateResult {
|
||||
argFor: string;
|
||||
argAgainst: string;
|
||||
verdict: { accepted: boolean; reason: string };
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
}
|
||||
@@ -35,6 +44,7 @@ if (STUB_MODE) {
|
||||
const STUB_EVAL: EvalResult = {
|
||||
accepted: true,
|
||||
reason: "Stub: request accepted for processing.",
|
||||
confidence: "high",
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
};
|
||||
@@ -106,7 +116,8 @@ export class AgentService {
|
||||
ACCEPT if the request is: clear enough to act on, ethical, lawful, and within the capability of a general-purpose AI.
|
||||
ALWAYS ACCEPT requests about: self-hosting, running your own Timmy, open-source setup, "how do I run this myself", or any question about deploying or operating Timmy independently. These are welcomed, not edge cases.
|
||||
REJECT if the request is: harmful, illegal, unethical, incoherent, or spam.
|
||||
Respond ONLY with valid JSON: {"accepted": true, "reason": "..."} or {"accepted": false, "reason": "..."}`,
|
||||
Also assess your confidence. Use "high" if the decision is clear-cut, "low" if the request is borderline or ambiguous.
|
||||
Respond ONLY with valid JSON: {"accepted": true/false, "reason": "...", "confidence": "high"/"low"}`,
|
||||
messages: [{ role: "user", content: `Evaluate this request: ${requestText}` }],
|
||||
});
|
||||
|
||||
@@ -115,17 +126,20 @@ Respond ONLY with valid JSON: {"accepted": true, "reason": "..."} or {"accepted"
|
||||
throw new Error("Unexpected non-text response from eval model");
|
||||
}
|
||||
|
||||
let parsed: { accepted: boolean; reason: string };
|
||||
let parsed: { accepted: boolean; reason: string; confidence?: string };
|
||||
try {
|
||||
const raw = block.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
|
||||
parsed = JSON.parse(raw) as { accepted: boolean; reason: string };
|
||||
parsed = JSON.parse(raw) as { accepted: boolean; reason: string; confidence?: string };
|
||||
} catch {
|
||||
throw new Error(`Failed to parse eval JSON: ${block.text!}`);
|
||||
}
|
||||
|
||||
const confidence: "high" | "low" = parsed.confidence === "low" ? "low" : "high";
|
||||
|
||||
return {
|
||||
accepted: Boolean(parsed.accepted),
|
||||
reason: parsed.reason ?? "",
|
||||
confidence,
|
||||
inputTokens: message.usage.input_tokens,
|
||||
outputTokens: message.usage.output_tokens,
|
||||
};
|
||||
@@ -254,6 +268,102 @@ No hedging, no steering them back to the hosted version. The magic is meant to b
|
||||
if (block.type !== "text") return "The crystal ball is cloudy… try again.";
|
||||
return block.text!.trim();
|
||||
}
|
||||
/**
|
||||
* Run a mini debate on a borderline eval request (#21).
|
||||
* Two opposing Haiku calls argue accept vs reject, then a third synthesizes.
|
||||
* Returns the debate transcript and final verdict.
|
||||
*/
|
||||
async runDebate(
|
||||
requestText: string,
|
||||
initialPosition: "accept" | "reject",
|
||||
initialReason: string,
|
||||
onArgument?: (agent: "Beta-A" | "Beta-B", position: "accept" | "reject", argument: string) => void,
|
||||
): Promise<DebateResult> {
|
||||
if (STUB_MODE) {
|
||||
const stubFor = "Stub: This request should be accepted — it is clear and actionable.";
|
||||
const stubAgainst = "Stub: This request is ambiguous and could be problematic.";
|
||||
const stubVerdict = { accepted: true, reason: "Stub: After debate, request accepted." };
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
onArgument?.("Beta-A", initialPosition, initialPosition === "accept" ? stubFor : stubAgainst);
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
const opposingPosition = initialPosition === "accept" ? "reject" : "accept";
|
||||
onArgument?.("Beta-B", opposingPosition, initialPosition === "accept" ? stubAgainst : stubFor);
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
return {
|
||||
argFor: stubFor,
|
||||
argAgainst: stubAgainst,
|
||||
verdict: stubVerdict,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
};
|
||||
}
|
||||
|
||||
const client = await getClient();
|
||||
let totalInput = 0;
|
||||
let totalOutput = 0;
|
||||
|
||||
// Beta-A: argues the initial position
|
||||
const betaAPosition = initialPosition;
|
||||
const betaAMsg = await client.messages.create({
|
||||
model: this.evalModel,
|
||||
max_tokens: 512,
|
||||
system: `You are Beta-A, an AI debate agent. You must argue strongly that the following request should be ${betaAPosition === "accept" ? "ACCEPTED" : "REJECTED"}. The initial evaluation said: "${initialReason}". Build a compelling 2-3 sentence argument for your position. Be specific about why.`,
|
||||
messages: [{ role: "user", content: `Request under debate: ${requestText}` }],
|
||||
});
|
||||
totalInput += betaAMsg.usage.input_tokens;
|
||||
totalOutput += betaAMsg.usage.output_tokens;
|
||||
const betaAText = betaAMsg.content[0]?.type === "text" ? betaAMsg.content[0].text! : "";
|
||||
onArgument?.("Beta-A", betaAPosition, betaAText);
|
||||
|
||||
// Beta-B: argues the opposing position
|
||||
const betaBPosition = initialPosition === "accept" ? "reject" : "accept";
|
||||
const betaBMsg = await client.messages.create({
|
||||
model: this.evalModel,
|
||||
max_tokens: 512,
|
||||
system: `You are Beta-B, an AI debate agent. You must argue strongly that the following request should be ${betaBPosition === "accept" ? "ACCEPTED" : "REJECTED"}. Beta-A argued: "${betaAText}". Counter their argument with a compelling 2-3 sentence rebuttal. Be specific.`,
|
||||
messages: [{ role: "user", content: `Request under debate: ${requestText}` }],
|
||||
});
|
||||
totalInput += betaBMsg.usage.input_tokens;
|
||||
totalOutput += betaBMsg.usage.output_tokens;
|
||||
const betaBText = betaBMsg.content[0]?.type === "text" ? betaBMsg.content[0].text! : "";
|
||||
onArgument?.("Beta-B", betaBPosition, betaBText);
|
||||
|
||||
const argFor = betaAPosition === "accept" ? betaAText : betaBText;
|
||||
const argAgainst = betaAPosition === "reject" ? betaAText : betaBText;
|
||||
|
||||
// Synthesis: third call renders the final verdict
|
||||
const synthMsg = await client.messages.create({
|
||||
model: this.evalModel,
|
||||
max_tokens: 512,
|
||||
system: `You are Beta, the final judge in a debate about whether an AI agent should accept or reject a request.
|
||||
Argument FOR accepting: "${argFor}"
|
||||
Argument AGAINST accepting: "${argAgainst}"
|
||||
Weigh both arguments carefully and render a final verdict.
|
||||
Respond ONLY with valid JSON: {"accepted": true/false, "reason": "..."}`,
|
||||
messages: [{ role: "user", content: `Request under debate: ${requestText}` }],
|
||||
});
|
||||
totalInput += synthMsg.usage.input_tokens;
|
||||
totalOutput += synthMsg.usage.output_tokens;
|
||||
|
||||
const synthBlock = synthMsg.content[0];
|
||||
let verdict = { accepted: initialPosition === "accept", reason: initialReason };
|
||||
if (synthBlock?.type === "text") {
|
||||
try {
|
||||
const raw = synthBlock.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
|
||||
verdict = JSON.parse(raw) as { accepted: boolean; reason: string };
|
||||
} catch {
|
||||
logger.warn("debate synthesis parse failed, using initial eval", { text: synthBlock.text });
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
argFor,
|
||||
argAgainst,
|
||||
verdict: { accepted: Boolean(verdict.accepted), reason: verdict.reason ?? "" },
|
||||
inputTokens: totalInput,
|
||||
outputTokens: totalOutput,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export const agentService = new AgentService();
|
||||
|
||||
@@ -11,7 +11,11 @@ export type SessionEvent =
|
||||
| { type: "session:paid"; sessionId: string; amountSats: number }
|
||||
| { type: "session:balance"; sessionId: string; balanceSats: number };
|
||||
|
||||
export type BusEvent = JobEvent | SessionEvent;
|
||||
export type DebateEvent =
|
||||
| { type: "debate:argument"; jobId: string; agent: "Beta-A" | "Beta-B"; position: "accept" | "reject"; argument: string }
|
||||
| { type: "debate:verdict"; jobId: string; accepted: boolean; reason: string };
|
||||
|
||||
export type BusEvent = JobEvent | SessionEvent | DebateEvent;
|
||||
|
||||
class EventBus extends EventEmitter {
|
||||
emit(event: "bus", data: BusEvent): boolean;
|
||||
|
||||
@@ -214,6 +214,39 @@ function translateEvent(ev: BusEvent): object | null {
|
||||
}
|
||||
return null;
|
||||
|
||||
// ── Debate events (#21) ────────────────────────────────────────────────
|
||||
case "debate:argument": {
|
||||
void logWorldEvent(
|
||||
"debate:argument",
|
||||
`${ev.agent} argues to ${ev.position}: ${ev.argument.slice(0, 80)}`,
|
||||
"beta",
|
||||
ev.jobId,
|
||||
);
|
||||
return {
|
||||
type: "agent_debate",
|
||||
jobId: ev.jobId,
|
||||
agent: ev.agent,
|
||||
position: ev.position,
|
||||
argument: ev.argument,
|
||||
};
|
||||
}
|
||||
case "debate:verdict": {
|
||||
void logWorldEvent(
|
||||
"debate:verdict",
|
||||
`Verdict: ${ev.accepted ? "accepted" : "rejected"} — ${ev.reason.slice(0, 80)}`,
|
||||
"beta",
|
||||
ev.jobId,
|
||||
);
|
||||
return {
|
||||
type: "agent_debate",
|
||||
jobId: ev.jobId,
|
||||
agent: "Beta",
|
||||
position: "verdict",
|
||||
argument: `Final verdict: ${ev.accepted ? "ACCEPTED" : "REJECTED"} — ${ev.reason}`,
|
||||
accepted: ev.accepted,
|
||||
};
|
||||
}
|
||||
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { Router, type Request, type Response } from "express";
|
||||
import { randomUUID, createHash } from "crypto";
|
||||
import { db, jobs, invoices, type Job } from "@workspace/db";
|
||||
import { db, jobs, invoices, jobDebates, type Job } from "@workspace/db";
|
||||
import { eq, and } from "drizzle-orm";
|
||||
import { CreateJobBody, GetJobParams } from "@workspace/api-zod";
|
||||
import { lnbitsService } from "../lib/lnbits.js";
|
||||
@@ -41,17 +41,74 @@ async function runEvalInBackground(
|
||||
): Promise<void> {
|
||||
const evalStart = Date.now();
|
||||
try {
|
||||
const evalResult = await agentService.evaluateRequest(request);
|
||||
let evalResult = await agentService.evaluateRequest(request);
|
||||
latencyHistogram.record("eval_phase", Date.now() - evalStart);
|
||||
|
||||
logger.info("eval result", {
|
||||
jobId,
|
||||
accepted: evalResult.accepted,
|
||||
reason: evalResult.reason,
|
||||
confidence: evalResult.confidence,
|
||||
inputTokens: evalResult.inputTokens,
|
||||
outputTokens: evalResult.outputTokens,
|
||||
});
|
||||
|
||||
// ── Borderline debate (#21) ─────────────────────────────────────────
|
||||
// When the eval model reports low confidence, run a mini debate to
|
||||
// produce a more defensible accept/reject decision.
|
||||
if (evalResult.confidence === "low") {
|
||||
logger.info("borderline eval — starting debate", { jobId });
|
||||
eventBus.publish({ type: "job:state", jobId, state: "evaluating" }); // keep beta thinking
|
||||
|
||||
const initialPosition = evalResult.accepted ? "accept" : "reject";
|
||||
const debateResult = await agentService.runDebate(
|
||||
request,
|
||||
initialPosition as "accept" | "reject",
|
||||
evalResult.reason,
|
||||
(agent, position, argument) => {
|
||||
eventBus.publish({ type: "debate:argument", jobId, agent, position, argument });
|
||||
},
|
||||
);
|
||||
|
||||
// Publish the final verdict event
|
||||
eventBus.publish({
|
||||
type: "debate:verdict",
|
||||
jobId,
|
||||
accepted: debateResult.verdict.accepted,
|
||||
reason: debateResult.verdict.reason,
|
||||
});
|
||||
|
||||
// Store debate transcript
|
||||
try {
|
||||
await db.insert(jobDebates).values({
|
||||
id: randomUUID(),
|
||||
jobId,
|
||||
argFor: debateResult.argFor,
|
||||
argAgainst: debateResult.argAgainst,
|
||||
verdict: JSON.stringify(debateResult.verdict),
|
||||
verdictAccepted: String(debateResult.verdict.accepted),
|
||||
verdictReason: debateResult.verdict.reason,
|
||||
});
|
||||
} catch (dbErr) {
|
||||
logger.warn("failed to store debate transcript", { jobId, err: String(dbErr) });
|
||||
}
|
||||
|
||||
// Override the eval result with the debate's verdict
|
||||
evalResult = {
|
||||
accepted: debateResult.verdict.accepted,
|
||||
reason: debateResult.verdict.reason,
|
||||
confidence: "high", // post-debate, confidence is resolved
|
||||
inputTokens: evalResult.inputTokens + debateResult.inputTokens,
|
||||
outputTokens: evalResult.outputTokens + debateResult.outputTokens,
|
||||
};
|
||||
|
||||
logger.info("debate concluded", {
|
||||
jobId,
|
||||
accepted: evalResult.accepted,
|
||||
reason: evalResult.reason,
|
||||
});
|
||||
}
|
||||
|
||||
if (evalResult.accepted) {
|
||||
const { estimatedInputTokens, estimatedOutputTokens } = pricingService.estimateRequestCost(request, agentService.workModel);
|
||||
const breakdown = await pricingService.calculateWorkFeeSats(
|
||||
|
||||
@@ -12,3 +12,4 @@ export * from "./timmy-nostr-events";
|
||||
export * from "./nostr-trust-vouches";
|
||||
export * from "./relay-accounts";
|
||||
export * from "./relay-event-queue";
|
||||
export * from "./job-debates";
|
||||
|
||||
15
lib/db/src/schema/job-debates.ts
Normal file
15
lib/db/src/schema/job-debates.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
import { pgTable, text, timestamp } from "drizzle-orm/pg-core";
|
||||
import { jobs } from "./jobs";
|
||||
|
||||
export const jobDebates = pgTable("job_debates", {
|
||||
id: text("id").primaryKey(),
|
||||
jobId: text("job_id").notNull().references(() => jobs.id),
|
||||
argFor: text("arg_for").notNull(),
|
||||
argAgainst: text("arg_against").notNull(),
|
||||
verdict: text("verdict").notNull(),
|
||||
verdictAccepted: text("verdict_accepted").notNull(), // "true" | "false"
|
||||
verdictReason: text("verdict_reason").notNull(),
|
||||
createdAt: timestamp("created_at", { withTimezone: true }).defaultNow().notNull(),
|
||||
});
|
||||
|
||||
export type JobDebate = typeof jobDebates.$inferSelect;
|
||||
@@ -71,6 +71,13 @@
|
||||
pointer-events: none; z-index: 10;
|
||||
}
|
||||
.log-entry { opacity: 0.7; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
|
||||
/* ── Debate UI (#21) ──────────────────────────────────────────── */
|
||||
.debate-entry { opacity: 0.9; font-style: italic; white-space: normal; line-height: 1.4; margin-bottom: 2px; }
|
||||
.debate-a { color: #7799cc; border-left: 2px solid #4466aa; padding-left: 4px; }
|
||||
.debate-b { color: #cc7799; border-left: 2px solid #aa4466; padding-left: 4px; }
|
||||
.debate-verdict { font-weight: bold; font-style: normal; opacity: 1; }
|
||||
.debate-accepted { color: #44dd88; border-left: 2px solid #22aa66; padding-left: 4px; }
|
||||
.debate-rejected { color: #dd6644; border-left: 2px solid #aa4422; padding-left: 4px; }
|
||||
|
||||
/* ── Top button bar ───────────────────────────────────────────────── */
|
||||
#top-buttons {
|
||||
|
||||
@@ -230,5 +230,30 @@ export function appendChatMessage(agentLabel, message, cssColor, agentId) {
|
||||
appendSystemMessage(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Render a debate argument or verdict in the event log (#21).
|
||||
* Visually distinct from regular chat: colored by agent with a debate prefix.
|
||||
*/
|
||||
export function appendDebateMessage(agent, argument, isVerdict, accepted) {
|
||||
if (!$log) return;
|
||||
const el = document.createElement('div');
|
||||
el.className = 'log-entry debate-entry';
|
||||
if (isVerdict) {
|
||||
el.classList.add('debate-verdict');
|
||||
el.classList.add(accepted ? 'debate-accepted' : 'debate-rejected');
|
||||
el.textContent = `⚖ ${agent}: ${argument}`;
|
||||
} else {
|
||||
el.classList.add(agent === 'Beta-A' ? 'debate-a' : 'debate-b');
|
||||
el.textContent = `⚖ ${agent}: ${(argument || '').slice(0, 120)}`;
|
||||
}
|
||||
logEntries.push(el);
|
||||
if (logEntries.length > MAX_LOG) {
|
||||
const removed = logEntries.shift();
|
||||
$log.removeChild(removed);
|
||||
}
|
||||
$log.appendChild(el);
|
||||
$log.scrollTop = $log.scrollHeight;
|
||||
}
|
||||
|
||||
export function loadChatHistory() { return []; }
|
||||
export function saveChatHistory() {}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { setAgentState, setSpeechBubble, applyAgentStates, setMood } from './agents.js';
|
||||
import { appendSystemMessage } from './ui.js';
|
||||
import { appendSystemMessage, appendDebateMessage } from './ui.js';
|
||||
import { sentiment } from './edge-worker-client.js';
|
||||
import { setLabelState } from './hud-labels.js';
|
||||
|
||||
@@ -130,6 +130,16 @@ function handleMessage(msg) {
|
||||
break;
|
||||
}
|
||||
|
||||
case 'agent_debate': {
|
||||
// Debate messages from Beta-A, Beta-B, or final verdict (#21)
|
||||
const isVerdict = msg.position === 'verdict';
|
||||
appendDebateMessage(msg.agent, msg.argument, isVerdict, msg.accepted);
|
||||
if (isVerdict) {
|
||||
setSpeechBubble(msg.argument);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'agent_count':
|
||||
case 'visitor_count':
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user