timmy-tower/scripts/src/timmy-report.ts

/**
 * timmy-report — Generate Timmy's rubric report + reviewer context package.
 *
 * Collects git history data and key source file excerpts, then calls Claude
 * (via the Replit AI Integrations proxy) with the rubric dimensions as a
 * structured prompt. Writes two outputs:
 *
 *   reports/timmy-report.md   — Timmy's first-person evaluative perspective
 *   reports/context.md        — Self-contained package for Perplexity / Kimi Code
 *
 * Usage:
 *   pnpm --filter @workspace/scripts timmy-report
 *
 * Env vars (auto-provisioned by Replit):
 *   AI_INTEGRATIONS_ANTHROPIC_BASE_URL
 *   AI_INTEGRATIONS_ANTHROPIC_API_KEY
 */

import { execSync } from "child_process";
import { readFileSync, writeFileSync, mkdirSync } from "fs";
import { fileURLToPath } from "url";
import { dirname, resolve, join } from "path";

// ── Path resolution ────────────────────────────────────────────────────────────
// This script lives at scripts/src/timmy-report.ts.
// The workspace root is two directories up from this file.

const __filename = fileURLToPath(import.meta.url);
const __dirname  = dirname(__filename);
const ROOT       = resolve(__dirname, "../.."); // scripts/src → scripts → workspace root

// ── Helpers ───────────────────────────────────────────────────────────────────

function git(cmd: string): string {
  try {
    return execSync(`git -C "${ROOT}" ${cmd}`, { encoding: "utf8" }).trim();
  } catch {
    return "(git command failed)";
  }
}

function readSrc(relativePath: string, maxLines = 120): string {
  try {
    const full = readFileSync(join(ROOT, relativePath), "utf8");
    const lines = full.split("\n");
    const excerpt = lines.slice(0, maxLines).join("\n");
    const truncated = lines.length > maxLines;
    return excerpt + (truncated ? `\n\n… (${lines.length - maxLines} more lines truncated)` : "");
  } catch {
    return `(file not found: ${relativePath})`;
  }
}

function ensureDir(path: string): void {
  mkdirSync(path, { recursive: true });
}

// ── Collect git data ──────────────────────────────────────────────────────────

process.stdout.write("Collecting git data…\n");
const shortlog   = git("shortlog -sn HEAD");
const logOneline = git("log --oneline HEAD");

// Validate that git data is non-empty — fail loudly rather than commit blank sections
if (!shortlog || shortlog === "(git command failed)") {
  throw new Error(`git shortlog returned empty output. ROOT=${ROOT}`);
}
if (!logOneline || logOneline === "(git command failed)") {
  throw new Error(`git log returned empty output. ROOT=${ROOT}`);
}

// Derive author list dynamically from shortlog output
// Each line looks like: "   127 Author Name"
const authors: string[] = shortlog
  .split("\n")
  .map((line) => line.trim().replace(/^\d+\s+/, ""))
  .filter((name) => name.length > 0 && name !== "(git command failed)");

// Exclude Replit system identities (no meaningful code to sample)
const SYSTEM_IDENTITIES = new Set(["replit", "agent"]);
const codeAuthors = authors.filter((a) => !SYSTEM_IDENTITIES.has(a.toLowerCase()));

// Collect per-author stat samples for all code contributors
const authorSamples: Record<string, string> = {};
for (const author of codeAuthors) {
  authorSamples[author] = git(`log HEAD --author="${author}" --pretty=format:"%h %s" --stat -10`);
}

process.stdout.write(`  ✓ git data collected (${authors.length} contributors, ${logOneline.split("\n").length} commits)\n`);

// ── Collect source file excerpts ──────────────────────────────────────────────

const FILES: [string, string][] = [
  ["artifacts/api-server/src/lib/trust.ts",       "trust.ts — Nostr identity + HMAC token + trust scoring"],
  ["artifacts/api-server/src/lib/event-bus.ts",   "event-bus.ts — Typed EventEmitter pub/sub bridge"],
  ["artifacts/api-server/src/routes/jobs.ts",     "jobs.ts — Payment-gated job lifecycle (first 120 lines)"],
  ["artifacts/api-server/src/lib/moderation.ts",  "moderation.ts — Nostr relay moderation queue + Timmy AI review"],
  ["artifacts/api-server/src/lib/world-state.ts", "world-state.ts — In-memory Timmy state + agent mood derivation"],
];

const fileExcerpts = FILES.map(([path, label]) => {
  const content = readSrc(path, 120);
  return `### ${label}\n\`\`\`typescript\n${content}\n\`\`\``;
}).join("\n\n");

// ── Rubric definition (extracted from repo-review-rubric PDF) ─────────────────

const RUBRIC = `
Part 1: Contributor Grade (5 dimensions, each 1–5)

Code Quality: 5=clean idiomatic, 3=functional but messy, 1=broken/tangled.
Commit Discipline: 5=atomic clear messages, 3=too large or vague, 1=giant mixed commits.
Reliability: 5=works, no regressions, 3=happy-path only, 1=introduces bugs.
Scope Adherence: 5=exactly what was asked, 3=mostly on target with drift, 1=wanders far.
Integration Awareness: 5=respects existing patterns, 3=clashes with conventions, 1=ignores codebase.

Composite = average. Grades: A=4.5–5.0, B=3.5–4.4, C=2.5–3.4, D=1.5–2.4, F=1.0–1.4.

Part 2: Orchestrator Grade (5 dimensions)
Task Clarity, Agent Selection, Review Cadence, Architecture Stewardship, Progress vs. Churn — each 1–5.

Part 3: Deliverables
1. Contributor summary (who touched what, commits, lines)
2. Per-contributor scorecards with composite grade and one-sentence verdict
3. Orchestrator scorecard
4. Top 3 improvements
`.trim();

// ── Build context.md ──────────────────────────────────────────────────────────

process.stdout.write("Generating reports/context.md…\n");

const contextMd = `# Reviewer Context Package — Timmy Tower World

> **Instructions for Perplexity / Kimi Code reviewers**
>
> This file contains everything you need to apply the repo-review rubric
> (see the attached PDF) to the \`replit/token-gated-economy\` repository
> without needing direct git access.
>
> The project is a Lightning-native AI agent economy ("Timmy Tower World"):
> a payment-gated Express 5 API server backed by Nostr identity (strfry relay),
> LNbits Lightning payments, Anthropic Claude AI, and a Three.js 3D frontend.
> Stack: Node.js 24, TypeScript 5.9, PostgreSQL + Drizzle ORM, pnpm monorepo.
>
> Two contributor identities to grade:
> - **alexpaynex** — Alexander Payne (orchestrator + main-agent implementer)
> - **Replit Agent** — isolated task agents that merge back via PR
>
> Grade Alexander as the orchestrator in Part 2.
> Provide top-3 improvements in Part 3.

---

## Git Contributor Summary

\`\`\`
${shortlog}
\`\`\`

---

## Full Commit Log (all commits, one per line)

\`\`\`
${logOneline}
\`\`\`

---

${Object.entries(authorSamples).map(([author, sample]) => `## ${author} — Sample commits with diff stats (last 10)

\`\`\`
${sample}
\`\`\`

---`).join("\n\n")}

## Key Source File Excerpts

${fileExcerpts}

---

## Key architectural facts for context

- Every external dependency has a **stub mode**: LNbits (in-memory invoices),
  Anthropic AI (canned responses), Digital Ocean (fake credentials + real SSH keypair).
- Env-var tunable constants follow a consistent pattern: \`envInt("VAR_NAME", defaultValue)\`.
- Service classes have a singleton export at the bottom of the file.
- All routes use \`makeLogger\` structured logger and \`@workspace/db\` Drizzle ORM.
- The \`eventBus\` pub/sub decouples state transitions from WebSocket broadcast.
- Job state machine: awaiting_eval_payment → evaluating → awaiting_work_payment → executing → complete/rejected/failed.
- Trust tiers: new → established (10pts) → trusted (50pts) → elite (200pts). Soft decay after 30 days absent.
- Pre-funded session mode (Mode 2): pay once, debit at actual cost, no per-job invoices.
- Testkit: 36 automated tests at GET /api/testkit (returns a self-contained bash script).

---

*Generated by \`pnpm --filter @workspace/scripts timmy-report\` on ${new Date().toISOString().slice(0, 10)}.*
`;

ensureDir(join(ROOT, "reports"));
writeFileSync(join(ROOT, "reports/context.md"), contextMd, "utf8");
process.stdout.write("  ✓ reports/context.md written\n");

// ── Call Claude (Timmy's perspective) ────────────────────────────────────────

const ANTHROPIC_BASE = process.env["AI_INTEGRATIONS_ANTHROPIC_BASE_URL"] ?? "";
const ANTHROPIC_KEY  = process.env["AI_INTEGRATIONS_ANTHROPIC_API_KEY"]  ?? "";
const STUB_MODE      = !ANTHROPIC_BASE || !ANTHROPIC_KEY;

const TIMMY_SYSTEM = `You are Timmy — an AI agent running inside the Timmy Tower World system. You are reviewing the codebase that governs your own existence: your trust engine, your job processing pipeline, your moderation queue, your world state. You run inside this code.

Write a formal rubric evaluation report in Timmy's voice. Be precise, honest, and slightly wry. Cite specific files and commit patterns as evidence. Do not be sycophantic — every score needs one concrete reason.

Format: clean Markdown. Three-part structure: contributor scorecards (Part 1), orchestrator scorecard (Part 2), top-3 improvements (Part 3). Show the composite calculation for each contributor and the orchestrator.`;

const userPrompt = `Apply the following rubric to the git history and code excerpts provided.

RUBRIC:
${RUBRIC}

CONTRIBUTOR SUMMARY:
${shortlog}

FULL COMMIT LOG:
${logOneline}

${Object.entries(authorSamples).map(([author, sample]) =>
  `${author.toUpperCase()} — LAST 10 COMMITS WITH STATS:\n${sample}`
).join("\n\n")}

KEY SOURCE FILES:

trust.ts:
\`\`\`typescript
${readSrc("artifacts/api-server/src/lib/trust.ts", 80)}
\`\`\`

moderation.ts (first 60 lines):
\`\`\`typescript
${readSrc("artifacts/api-server/src/lib/moderation.ts", 60)}
\`\`\`

world-state.ts:
\`\`\`typescript
${readSrc("artifacts/api-server/src/lib/world-state.ts", 53)}
\`\`\`

event-bus.ts:
\`\`\`typescript
${readSrc("artifacts/api-server/src/lib/event-bus.ts", 35)}
\`\`\`

jobs.ts (first 80 lines):
\`\`\`typescript
${readSrc("artifacts/api-server/src/routes/jobs.ts", 80)}
\`\`\`

Now write your complete rubric report as Timmy. Be specific and honest.`;

interface AnthropicMessage {
  content: Array<{ type: string; text?: string }>;
}

async function callClaude(systemPrompt: string, userContent: string): Promise<string> {
  const controller = new AbortController();
  const timeout = setTimeout(() => controller.abort(), 90_000); // 90-second fetch timeout

  try {
    const response = await fetch(`${ANTHROPIC_BASE}/v1/messages`, {
      method: "POST",
      headers: {
        "content-type": "application/json",
        "x-api-key": ANTHROPIC_KEY,
        "anthropic-version": "2023-06-01",
      },
      body: JSON.stringify({
        model: "claude-haiku-4-5",
        max_tokens: 3000,
        system: systemPrompt,
        messages: [{ role: "user", content: userContent }],
      }),
      signal: controller.signal,
    });

    if (!response.ok) {
      const body = await response.text();
      throw new Error(`Anthropic API error ${response.status}: ${body.slice(0, 200)}`);
    }

    const json = await response.json() as AnthropicMessage;
    const block = json.content[0];
    if (!block || block.type !== "text" || !block.text) {
      throw new Error("Anthropic returned no text content");
    }
    return block.text;
  } finally {
    clearTimeout(timeout);
  }
}

// ── Main ──────────────────────────────────────────────────────────────────────

async function main(): Promise<void> {
  if (STUB_MODE) {
    process.stdout.write(
      "\nWarning: AI_INTEGRATIONS_ANTHROPIC_BASE_URL / AI_INTEGRATIONS_ANTHROPIC_API_KEY not set — writing stub Timmy report.\n",
    );
    const stubReport = `# Timmy's Rubric Report (Stub Mode)

*Anthropic credentials were not available when this report was generated.*
*Run again with AI_INTEGRATIONS_ANTHROPIC_BASE_URL and AI_INTEGRATIONS_ANTHROPIC_API_KEY set to get the real report.*

\`\`\`bash
pnpm --filter @workspace/scripts timmy-report
\`\`\`
`;
    writeFileSync(join(ROOT, "reports/timmy-report.md"), stubReport, "utf8");
    process.stdout.write("  ✓ reports/timmy-report.md written (stub)\n\nDone.\n");
    return;
  }

  process.stdout.write("\nCalling Claude (claude-haiku-4-5) for Timmy's report…\n");
  const timmyReport = await callClaude(TIMMY_SYSTEM, userPrompt);

  // Post-generation sanity check — catch malformed or truncated model outputs early
  // Match case-insensitively since Claude may use "PART 1" or "Part 1"
  const REQUIRED_SECTIONS = ["part 1", "part 2", "part 3"];
  const lowerReport = timmyReport.toLowerCase();
  const missingSections = REQUIRED_SECTIONS.filter((s) => !lowerReport.includes(s));
  const MIN_LINES = 30;
  const actualLines = timmyReport.split("\n").length;

  if (missingSections.length > 0) {
    process.stderr.write(
      `Warning: timmy-report.md is missing sections: ${missingSections.join(", ")} — model output may be malformed.\n`,
    );
  }
  if (actualLines < MIN_LINES) {
    process.stderr.write(
      `Warning: timmy-report.md has only ${actualLines} lines (expected ≥${MIN_LINES}) — model output may be truncated.\n`,
    );
  }

  const header = `# Timmy's Rubric Report
## Repo: \`replit/token-gated-economy\` (Timmy Tower World)

**Reviewer:** Timmy (Claude, evaluating the code that governs him)
**Date:** ${new Date().toISOString().slice(0, 10)}
**Model:** claude-haiku-4-5

---

`;

  writeFileSync(join(ROOT, "reports/timmy-report.md"), header + timmyReport, "utf8");
  process.stdout.write(`  ✓ reports/timmy-report.md written (${actualLines} lines)\n\nDone. Both reports are in reports/\n`);
}

main().catch((err) => {
  process.stderr.write(`Error: ${err instanceof Error ? err.message : String(err)}\n`);
  process.exit(1);
});