#!/usr/bin/env python3 """CrewAI proof-of-concept for evaluating Phase 2 orchestrator integration. Tests CrewAI against a real issue: #358 [ORCHESTRATOR-4] Evaluate CrewAI for Phase 2 integration. """ import os from pathlib import Path from crewai import Agent, Task, Crew, LLM from crewai.tools import BaseTool # ── Configuration ───────────────────────────────────────────────────── OPENROUTER_API_KEY = os.getenv( "OPENROUTER_API_KEY", os.environ.get("OPENROUTER_API_KEY", ""), ) llm = LLM( model="openrouter/google/gemini-2.0-flash-001", api_key=OPENROUTER_API_KEY, base_url="https://openrouter.ai/api/v1", ) REPO_ROOT = Path(__file__).resolve().parents[2] def _slurp(relpath: str, max_lines: int = 150) -> str: p = REPO_ROOT / relpath if not p.exists(): return f"[FILE NOT FOUND: {relpath}]" lines = p.read_text().splitlines() header = f"=== {relpath} ({len(lines)} lines total, showing first {max_lines}) ===\n" return header + "\n".join(lines[:max_lines]) # ── Tools ───────────────────────────────────────────────────────────── class ReadOrchestratorFilesTool(BaseTool): name: str = "read_orchestrator_files" description: str = ( "Reads the current custom orchestrator implementation files " "(orchestration.py, tasks.py, timmy-orchestrator.sh, coordinator-first-protocol.md) " "and returns their contents for analysis." ) def _run(self) -> str: return "\n\n".join( [ _slurp("orchestration.py"), _slurp("tasks.py", max_lines=120), _slurp("bin/timmy-orchestrator.sh", max_lines=120), _slurp("docs/coordinator-first-protocol.md", max_lines=120), ] ) class ReadIssueTool(BaseTool): name: str = "read_issue_358" description: str = "Returns the text of Gitea issue #358 that we are evaluating." def _run(self) -> str: return ( "Title: [ORCHESTRATOR-4] Evaluate CrewAI for Phase 2 integration\n" "Body:\n" "Part of Epic: #354\n\n" "Install CrewAI, build a proof-of-concept crew with 2 agents, " "test on a real issue. Evaluate: does it add value over our custom orchestrator? Document findings." ) # ── Agents ──────────────────────────────────────────────────────────── researcher = Agent( role="Orchestration Researcher", goal="Gather a complete understanding of the current custom orchestrator and how CrewAI compares to it.", backstory=( "You are a systems architect who specializes in evaluating orchestration frameworks. " "You read code carefully, extract facts, and avoid speculation. " "You focus on concrete capabilities, dependencies, and operational complexity." ), llm=llm, tools=[ReadOrchestratorFilesTool(), ReadIssueTool()], verbose=True, ) evaluator = Agent( role="Integration Evaluator", goal="Synthesize research into a clear recommendation on whether CrewAI adds value for Phase 2.", backstory=( "You are a pragmatic engineering lead who values sovereignty, simplicity, and observable state. " "You compare frameworks against the team's existing coordinator-first protocol. " "You produce structured recommendations with explicit trade-offs." ), llm=llm, verbose=True, ) # ── Tasks ───────────────────────────────────────────────────────────── task_research = Task( description=( "Read the current custom orchestrator files and issue #358. " "Produce a structured research report covering:\n" "1. Current stack summary (Huey + tasks.py + timmy-orchestrator.sh)\n" "2. Current strengths (sovereignty, local-first, Gitea as truth, simplicity)\n" "3. Current gaps or limitations (if any)\n" "4. What CrewAI offers (agent roles, tasks, crews, tools, memory/RAG)\n" "5. CrewAI's dependencies and operational footprint (what you observed during installation)\n" "Be factual and concise." ), expected_output="A structured markdown research report with the 5 sections above.", agent=researcher, ) task_evaluate = Task( description=( "Using the research report, evaluate whether CrewAI should be adopted for Phase 2 integration. " "Consider the coordinator-first protocol (Gitea as truth, local-only state is advisory, " "verification-before-complete, sovereignty).\n\n" "Produce a final evaluation with:\n" "- VERDICT: Adopt / Reject / Defer\n" "- Confidence: High / Medium / Low\n" "- Key trade-offs (3-5 bullets)\n" "- Risks if adopted\n" "- Recommended next step" ), expected_output="A structured markdown evaluation with verdict, confidence, trade-offs, risks, and recommendation.", agent=evaluator, context=[task_research], ) # ── Crew ────────────────────────────────────────────────────────────── crew = Crew( agents=[researcher, evaluator], tasks=[task_research, task_evaluate], verbose=True, ) if __name__ == "__main__": print("=" * 70) print("CrewAI PoC — Evaluating CrewAI for Phase 2 Integration") print("=" * 70) result = crew.kickoff() print("\n" + "=" * 70) print("FINAL OUTPUT") print("=" * 70) print(result.raw)