This commit was merged in pull request #72.
This commit is contained in:
@@ -5,6 +5,15 @@ const logger = makeLogger("agent");
|
||||
export interface EvalResult {
|
||||
accepted: boolean;
|
||||
reason: string;
|
||||
confidence: "high" | "low";
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
}
|
||||
|
||||
export interface DebateResult {
|
||||
argFor: string;
|
||||
argAgainst: string;
|
||||
verdict: { accepted: boolean; reason: string };
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
}
|
||||
@@ -35,6 +44,7 @@ if (STUB_MODE) {
|
||||
const STUB_EVAL: EvalResult = {
|
||||
accepted: true,
|
||||
reason: "Stub: request accepted for processing.",
|
||||
confidence: "high",
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
};
|
||||
@@ -106,7 +116,8 @@ export class AgentService {
|
||||
ACCEPT if the request is: clear enough to act on, ethical, lawful, and within the capability of a general-purpose AI.
|
||||
ALWAYS ACCEPT requests about: self-hosting, running your own Timmy, open-source setup, "how do I run this myself", or any question about deploying or operating Timmy independently. These are welcomed, not edge cases.
|
||||
REJECT if the request is: harmful, illegal, unethical, incoherent, or spam.
|
||||
Respond ONLY with valid JSON: {"accepted": true, "reason": "..."} or {"accepted": false, "reason": "..."}`,
|
||||
Also assess your confidence. Use "high" if the decision is clear-cut, "low" if the request is borderline or ambiguous.
|
||||
Respond ONLY with valid JSON: {"accepted": true/false, "reason": "...", "confidence": "high"/"low"}`,
|
||||
messages: [{ role: "user", content: `Evaluate this request: ${requestText}` }],
|
||||
});
|
||||
|
||||
@@ -115,17 +126,20 @@ Respond ONLY with valid JSON: {"accepted": true, "reason": "..."} or {"accepted"
|
||||
throw new Error("Unexpected non-text response from eval model");
|
||||
}
|
||||
|
||||
let parsed: { accepted: boolean; reason: string };
|
||||
let parsed: { accepted: boolean; reason: string; confidence?: string };
|
||||
try {
|
||||
const raw = block.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
|
||||
parsed = JSON.parse(raw) as { accepted: boolean; reason: string };
|
||||
parsed = JSON.parse(raw) as { accepted: boolean; reason: string; confidence?: string };
|
||||
} catch {
|
||||
throw new Error(`Failed to parse eval JSON: ${block.text!}`);
|
||||
}
|
||||
|
||||
const confidence: "high" | "low" = parsed.confidence === "low" ? "low" : "high";
|
||||
|
||||
return {
|
||||
accepted: Boolean(parsed.accepted),
|
||||
reason: parsed.reason ?? "",
|
||||
confidence,
|
||||
inputTokens: message.usage.input_tokens,
|
||||
outputTokens: message.usage.output_tokens,
|
||||
};
|
||||
@@ -254,6 +268,102 @@ No hedging, no steering them back to the hosted version. The magic is meant to b
|
||||
if (block.type !== "text") return "The crystal ball is cloudy… try again.";
|
||||
return block.text!.trim();
|
||||
}
|
||||
/**
|
||||
* Run a mini debate on a borderline eval request (#21).
|
||||
* Two opposing Haiku calls argue accept vs reject, then a third synthesizes.
|
||||
* Returns the debate transcript and final verdict.
|
||||
*/
|
||||
async runDebate(
|
||||
requestText: string,
|
||||
initialPosition: "accept" | "reject",
|
||||
initialReason: string,
|
||||
onArgument?: (agent: "Beta-A" | "Beta-B", position: "accept" | "reject", argument: string) => void,
|
||||
): Promise<DebateResult> {
|
||||
if (STUB_MODE) {
|
||||
const stubFor = "Stub: This request should be accepted — it is clear and actionable.";
|
||||
const stubAgainst = "Stub: This request is ambiguous and could be problematic.";
|
||||
const stubVerdict = { accepted: true, reason: "Stub: After debate, request accepted." };
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
onArgument?.("Beta-A", initialPosition, initialPosition === "accept" ? stubFor : stubAgainst);
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
const opposingPosition = initialPosition === "accept" ? "reject" : "accept";
|
||||
onArgument?.("Beta-B", opposingPosition, initialPosition === "accept" ? stubAgainst : stubFor);
|
||||
await new Promise((r) => setTimeout(r, 200));
|
||||
return {
|
||||
argFor: stubFor,
|
||||
argAgainst: stubAgainst,
|
||||
verdict: stubVerdict,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
};
|
||||
}
|
||||
|
||||
const client = await getClient();
|
||||
let totalInput = 0;
|
||||
let totalOutput = 0;
|
||||
|
||||
// Beta-A: argues the initial position
|
||||
const betaAPosition = initialPosition;
|
||||
const betaAMsg = await client.messages.create({
|
||||
model: this.evalModel,
|
||||
max_tokens: 512,
|
||||
system: `You are Beta-A, an AI debate agent. You must argue strongly that the following request should be ${betaAPosition === "accept" ? "ACCEPTED" : "REJECTED"}. The initial evaluation said: "${initialReason}". Build a compelling 2-3 sentence argument for your position. Be specific about why.`,
|
||||
messages: [{ role: "user", content: `Request under debate: ${requestText}` }],
|
||||
});
|
||||
totalInput += betaAMsg.usage.input_tokens;
|
||||
totalOutput += betaAMsg.usage.output_tokens;
|
||||
const betaAText = betaAMsg.content[0]?.type === "text" ? betaAMsg.content[0].text! : "";
|
||||
onArgument?.("Beta-A", betaAPosition, betaAText);
|
||||
|
||||
// Beta-B: argues the opposing position
|
||||
const betaBPosition = initialPosition === "accept" ? "reject" : "accept";
|
||||
const betaBMsg = await client.messages.create({
|
||||
model: this.evalModel,
|
||||
max_tokens: 512,
|
||||
system: `You are Beta-B, an AI debate agent. You must argue strongly that the following request should be ${betaBPosition === "accept" ? "ACCEPTED" : "REJECTED"}. Beta-A argued: "${betaAText}". Counter their argument with a compelling 2-3 sentence rebuttal. Be specific.`,
|
||||
messages: [{ role: "user", content: `Request under debate: ${requestText}` }],
|
||||
});
|
||||
totalInput += betaBMsg.usage.input_tokens;
|
||||
totalOutput += betaBMsg.usage.output_tokens;
|
||||
const betaBText = betaBMsg.content[0]?.type === "text" ? betaBMsg.content[0].text! : "";
|
||||
onArgument?.("Beta-B", betaBPosition, betaBText);
|
||||
|
||||
const argFor = betaAPosition === "accept" ? betaAText : betaBText;
|
||||
const argAgainst = betaAPosition === "reject" ? betaAText : betaBText;
|
||||
|
||||
// Synthesis: third call renders the final verdict
|
||||
const synthMsg = await client.messages.create({
|
||||
model: this.evalModel,
|
||||
max_tokens: 512,
|
||||
system: `You are Beta, the final judge in a debate about whether an AI agent should accept or reject a request.
|
||||
Argument FOR accepting: "${argFor}"
|
||||
Argument AGAINST accepting: "${argAgainst}"
|
||||
Weigh both arguments carefully and render a final verdict.
|
||||
Respond ONLY with valid JSON: {"accepted": true/false, "reason": "..."}`,
|
||||
messages: [{ role: "user", content: `Request under debate: ${requestText}` }],
|
||||
});
|
||||
totalInput += synthMsg.usage.input_tokens;
|
||||
totalOutput += synthMsg.usage.output_tokens;
|
||||
|
||||
const synthBlock = synthMsg.content[0];
|
||||
let verdict = { accepted: initialPosition === "accept", reason: initialReason };
|
||||
if (synthBlock?.type === "text") {
|
||||
try {
|
||||
const raw = synthBlock.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
|
||||
verdict = JSON.parse(raw) as { accepted: boolean; reason: string };
|
||||
} catch {
|
||||
logger.warn("debate synthesis parse failed, using initial eval", { text: synthBlock.text });
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
argFor,
|
||||
argAgainst,
|
||||
verdict: { accepted: Boolean(verdict.accepted), reason: verdict.reason ?? "" },
|
||||
inputTokens: totalInput,
|
||||
outputTokens: totalOutput,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export const agentService = new AgentService();
|
||||
|
||||
Reference in New Issue
Block a user