Add streaming capabilities and improve API stability and security

Introduce streaming for AI job execution, implement rate limiting for API endpoints, enhance CORS configuration, and refactor event handling.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 418bf6f8-212b-4bb0-a7a5-8231a061da4e
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: 2967540c-7b01-4168-87be-fde774e32494
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/9f85e954-647c-46a5-90a7-396e495a805a/418bf6f8-212b-4bb0-a7a5-8231a061da4e/Q83Uqvu
Replit-Helium-Checkpoint-Created: true
This commit is contained in:
alexpaynex
2026-03-18 22:17:10 +00:00
parent ca94c0a9e5
commit 5b3d7edf6a
9 changed files with 388 additions and 15 deletions

View File

@@ -79,6 +79,48 @@ Fulfill it thoroughly and helpfully. Be concise yet complete.`,
outputTokens: message.usage.output_tokens,
};
}
/**
* Streaming variant of executeWork (#3). Calls onChunk for every text delta
* so callers can pipe tokens to an SSE stream in real time.
*/
async executeWorkStreaming(
requestText: string,
onChunk: (delta: string) => void,
): Promise<WorkResult> {
let fullText = "";
let inputTokens = 0;
let outputTokens = 0;
const stream = anthropic.messages.stream({
model: this.workModel,
max_tokens: 8192,
system: `You are Timmy, a capable AI agent. A user has paid for you to handle their request.
Fulfill it thoroughly and helpfully. Be concise yet complete.`,
messages: [{ role: "user", content: requestText }],
});
for await (const event of stream) {
if (
event.type === "content_block_delta" &&
event.delta.type === "text_delta"
) {
const delta = event.delta.text;
fullText += delta;
onChunk(delta);
} else if (event.type === "message_delta" && event.usage) {
outputTokens = event.usage.output_tokens;
} else if (event.type === "message_start" && event.message.usage) {
inputTokens = event.message.usage.input_tokens;
}
}
return {
result: fullText,
inputTokens,
outputTokens,
};
}
}
export const agentService = new AgentService();

View File

@@ -0,0 +1,33 @@
import { EventEmitter } from "events";
export type JobEvent =
| { type: "job:state"; jobId: string; state: string }
| { type: "job:paid"; jobId: string; invoiceType: "eval" | "work" }
| { type: "job:completed"; jobId: string; result: string }
| { type: "job:failed"; jobId: string; reason: string };
export type SessionEvent =
| { type: "session:state"; sessionId: string; state: string }
| { type: "session:paid"; sessionId: string; amountSats: number }
| { type: "session:balance"; sessionId: string; balanceSats: number };
export type BusEvent = JobEvent | SessionEvent;
class EventBus extends EventEmitter {
emit(event: "bus", data: BusEvent): boolean;
emit(event: string, ...args: unknown[]): boolean {
return super.emit(event, ...args);
}
on(event: "bus", listener: (data: BusEvent) => void): this;
on(event: string, listener: (...args: unknown[]) => void): this {
return super.on(event, listener);
}
publish(data: BusEvent): void {
this.emit("bus", data);
}
}
export const eventBus = new EventBus();
eventBus.setMaxListeners(256);

View File

@@ -0,0 +1,33 @@
import { rateLimit, type Options } from "express-rate-limit";
function envInt(key: string, fallback: number): number {
const v = process.env[key];
const n = v ? parseInt(v, 10) : NaN;
return Number.isNaN(n) ? fallback : n;
}
function limiter(windowMs: number, max: number, overrideKey?: string) {
const resolvedMax = overrideKey ? envInt(overrideKey, max) : max;
return rateLimit({
windowMs,
max: resolvedMax,
standardHeaders: "draft-7",
legacyHeaders: false,
handler: (_req, res) => {
res.status(429).json({
error: "rate_limited",
message: "Too many requests — please slow down.",
retryAfterSeconds: Math.ceil(windowMs / 1000),
});
},
} satisfies Partial<Options>);
}
// POST /api/jobs — 30 req/min per IP (configurable via RATE_LIMIT_JOBS)
export const jobsLimiter = limiter(60_000, 30, "RATE_LIMIT_JOBS");
// POST /api/sessions — 10 req/min per IP (configurable via RATE_LIMIT_SESSIONS)
export const sessionsLimiter = limiter(60_000, 10, "RATE_LIMIT_SESSIONS");
// POST /api/bootstrap — 3 req/hour per IP (configurable via RATE_LIMIT_BOOTSTRAP)
export const bootstrapLimiter = limiter(60 * 60_000, 3, "RATE_LIMIT_BOOTSTRAP");

View File

@@ -0,0 +1,55 @@
import { PassThrough } from "stream";
interface StreamEntry {
stream: PassThrough;
createdAt: number;
}
class StreamRegistry {
private readonly streams = new Map<string, StreamEntry>();
private readonly TTL_MS = 5 * 60 * 1000;
register(jobId: string): PassThrough {
const existing = this.streams.get(jobId);
if (existing) {
existing.stream.destroy();
}
const stream = new PassThrough();
this.streams.set(jobId, { stream, createdAt: Date.now() });
stream.on("close", () => {
this.streams.delete(jobId);
});
this.evictExpired();
return stream;
}
get(jobId: string): PassThrough | null {
return this.streams.get(jobId)?.stream ?? null;
}
write(jobId: string, chunk: string): void {
this.streams.get(jobId)?.stream.write(chunk);
}
end(jobId: string): void {
const entry = this.streams.get(jobId);
if (entry) {
entry.stream.end();
this.streams.delete(jobId);
}
}
private evictExpired(): void {
const now = Date.now();
for (const [id, entry] of this.streams.entries()) {
if (now - entry.createdAt > this.TTL_MS) {
entry.stream.destroy();
this.streams.delete(id);
}
}
}
}
export const streamRegistry = new StreamRegistry();