/** * moderation.ts — Event moderation queue + Timmy AI review * * Every Nostr event from a non-elite whitelisted account is held in * relay_event_queue with status "pending". Timmy (Claude haiku) reviews * pending events in a background poll loop and either auto_approves them * (injecting into strfry) or flags them for admin review. * * Elite accounts bypass this queue — their events are injected directly * from the relay policy handler. */ import { db, relayEventQueue, type QueueReviewer } from "@workspace/db"; import { eq, and } from "drizzle-orm"; import { makeLogger } from "./logger.js"; import { injectEvent } from "./strfry.js"; const logger = makeLogger("moderation"); // ── Stub mode (mirrors agent.ts) ───────────────────────────────────────────── const STUB_MODE = !process.env["AI_INTEGRATIONS_ANTHROPIC_API_KEY"] || !process.env["AI_INTEGRATIONS_ANTHROPIC_BASE_URL"]; if (STUB_MODE) { logger.warn("no Anthropic key — moderation running in STUB mode (auto-approve all)"); } // ── Anthropic lazy client (reuse from agent.ts pattern) ────────────────────── interface AnthropicLike { messages: { create(params: Record): Promise<{ content: Array<{ type: string; text?: string }>; usage: { input_tokens: number; output_tokens: number }; }>; }; } let _anthropic: AnthropicLike | null = null; async function getClient(): Promise { if (_anthropic) return _anthropic; // @ts-expect-error -- integrations-anthropic-ai exports src directly const mod = (await import("@workspace/integrations-anthropic-ai")) as { anthropic: AnthropicLike }; _anthropic = mod.anthropic; return _anthropic; } // ── Moderation prompt ───────────────────────────────────────────────────────── const MODERATION_SYSTEM = `You are moderating events on a sovereign Nostr relay. Your job is to approve benign content and flag anything harmful. APPROVE if the event is: a standard text note, profile update, reaction, encrypted DM, relay list, metadata update, or other typical Nostr activity. FLAG if the event is: spam, harassment, illegal content, NSFW without appropriate warnings, coordinated abuse, or clearly malicious. Respond ONLY with valid JSON: {"decision": "approve", "reason": "..."} or {"decision": "flag", "reason": "..."}`; type ModerationDecision = "approve" | "flag"; interface ModerationResult { decision: ModerationDecision; reason: string; } async function callClaude(kind: number, content: string): Promise { if (STUB_MODE) { return { decision: "approve", reason: "Stub: auto-approved (no Anthropic key)" }; } const client = await getClient(); const message = await client.messages.create({ model: process.env["MODERATION_MODEL"] ?? "claude-haiku-4-5", max_tokens: 256, system: MODERATION_SYSTEM, messages: [ { role: "user", content: `Nostr event kind ${kind}. Content: ${content.slice(0, 2000)}`, }, ], }); const block = message.content[0]; if (!block || block.type !== "text") { return { decision: "flag", reason: "AI returned unexpected response" }; } try { const raw = block.text!.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim(); const parsed = JSON.parse(raw) as { decision: string; reason?: string }; const decision = parsed.decision === "approve" ? "approve" : "flag"; return { decision, reason: parsed.reason ?? "" }; } catch { logger.warn("moderation: failed to parse Claude response", { text: block.text!.slice(0, 100), }); return { decision: "flag", reason: "Failed to parse AI response" }; } } // ── ModerationService ───────────────────────────────────────────────────────── export class ModerationService { /** * Insert an event into the moderation queue with "pending" status. * Idempotent: if the event_id already exists, the insert is silently skipped. */ async enqueue(event: { id: string; pubkey: string; kind: number; rawJson: string; }): Promise { await db .insert(relayEventQueue) .values({ eventId: event.id, pubkey: event.pubkey, kind: event.kind, rawEvent: event.rawJson, status: "pending", }) .onConflictDoNothing(); logger.info("moderation: event enqueued", { eventId: event.id.slice(0, 8), pubkey: event.pubkey.slice(0, 8), kind: event.kind, }); } /** * Review a single pending event with Claude. * Returns "approve" (event is injected into strfry + status → auto_approved) * or "flag" (status stays pending — admin must decide). */ async autoReview(eventId: string): Promise { const rows = await db .select() .from(relayEventQueue) .where( and( eq(relayEventQueue.eventId, eventId), eq(relayEventQueue.status, "pending"), ), ) .limit(1); const row = rows[0]; if (!row) { logger.warn("moderation: autoReview called on non-pending event", { eventId }); return "flag"; } let content = ""; try { const parsed = JSON.parse(row.rawEvent) as { content?: string }; content = parsed.content ?? ""; } catch { content = ""; } let result: ModerationResult; try { result = await callClaude(row.kind, content); } catch (err) { logger.error("moderation: Claude call failed — flagging for admin review", { eventId: eventId.slice(0, 8), err, }); result = { decision: "flag", reason: "AI review failed — admin review required" }; } if (result.decision === "approve") { await this.decide(eventId, "auto_approved", result.reason, "timmy_ai"); } else { // Update reason but leave status as "pending" for admin await db .update(relayEventQueue) .set({ reviewReason: result.reason, reviewedBy: "timmy_ai" }) .where(eq(relayEventQueue.eventId, eventId)); logger.info("moderation: event flagged for admin review", { eventId: eventId.slice(0, 8), reason: result.reason, }); } return result.decision; } /** * Apply a moderation decision (approve/auto_approved/rejected). * On approval: inject the event into strfry. */ async decide( eventId: string, status: "approved" | "rejected" | "auto_approved", reason: string, reviewedBy: QueueReviewer, ): Promise { await db .update(relayEventQueue) .set({ status, reviewedBy, reviewReason: reason, decidedAt: new Date(), }) .where(eq(relayEventQueue.eventId, eventId)); logger.info("moderation: decision recorded", { eventId: eventId.slice(0, 8), status, reviewedBy, }); if (status === "approved" || status === "auto_approved") { const rows = await db .select({ rawEvent: relayEventQueue.rawEvent }) .from(relayEventQueue) .where(eq(relayEventQueue.eventId, eventId)) .limit(1); const rawEvent = rows[0]?.rawEvent; if (rawEvent) { const result = await injectEvent(rawEvent); if (!result.ok) { logger.error("moderation: strfry inject failed after approval", { eventId: eventId.slice(0, 8), error: result.error, }); } } } } /** * Background poll: auto-review up to `limit` pending events. * Called every 30 seconds from the startup poll loop. */ async processPending(limit = 10): Promise { const rows = await db .select({ eventId: relayEventQueue.eventId }) .from(relayEventQueue) .where(eq(relayEventQueue.status, "pending")) .limit(limit); if (rows.length === 0) return; logger.info("moderation: processing pending events", { count: rows.length }); for (const { eventId } of rows) { try { await this.autoReview(eventId); } catch (err) { logger.error("moderation: poll failed for event", { eventId: eventId.slice(0, 8), err, }); } } } } export const moderationService = new ModerationService();