- Install CrewAI v1.13.0 in evaluations/crewai/ - Build 2-agent proof-of-concept (Researcher + Evaluator) - Test operational execution against issue #358 - Document findings: REJECT for Phase 2 integration CrewAI's 500+ MB dependency footprint, memory-model drift from Gitea-as-truth, and external API fragility outweigh its agent-role syntax benefits. Recommend evolving the existing Huey stack instead. Closes #358
151 lines
5.9 KiB
Python
151 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
"""CrewAI proof-of-concept for evaluating Phase 2 orchestrator integration.
|
|
|
|
Tests CrewAI against a real issue: #358 [ORCHESTRATOR-4] Evaluate CrewAI
|
|
for Phase 2 integration.
|
|
"""
|
|
|
|
import os
|
|
from pathlib import Path
|
|
from crewai import Agent, Task, Crew, LLM
|
|
from crewai.tools import BaseTool
|
|
|
|
# ── Configuration ─────────────────────────────────────────────────────
|
|
|
|
OPENROUTER_API_KEY = os.getenv(
|
|
"OPENROUTER_API_KEY",
|
|
"dsk-or-v1-f60c89db12040267458165cf192e815e339eb70548e4a0a461f5f0f69e6ef8b0",
|
|
)
|
|
|
|
llm = LLM(
|
|
model="openrouter/google/gemini-2.0-flash-001",
|
|
api_key=OPENROUTER_API_KEY,
|
|
base_url="https://openrouter.ai/api/v1",
|
|
)
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
|
|
|
|
def _slurp(relpath: str, max_lines: int = 150) -> str:
|
|
p = REPO_ROOT / relpath
|
|
if not p.exists():
|
|
return f"[FILE NOT FOUND: {relpath}]"
|
|
lines = p.read_text().splitlines()
|
|
header = f"=== {relpath} ({len(lines)} lines total, showing first {max_lines}) ===\n"
|
|
return header + "\n".join(lines[:max_lines])
|
|
|
|
|
|
# ── Tools ─────────────────────────────────────────────────────────────
|
|
|
|
class ReadOrchestratorFilesTool(BaseTool):
|
|
name: str = "read_orchestrator_files"
|
|
description: str = (
|
|
"Reads the current custom orchestrator implementation files "
|
|
"(orchestration.py, tasks.py, timmy-orchestrator.sh, coordinator-first-protocol.md) "
|
|
"and returns their contents for analysis."
|
|
)
|
|
|
|
def _run(self) -> str:
|
|
return "\n\n".join(
|
|
[
|
|
_slurp("orchestration.py"),
|
|
_slurp("tasks.py", max_lines=120),
|
|
_slurp("bin/timmy-orchestrator.sh", max_lines=120),
|
|
_slurp("docs/coordinator-first-protocol.md", max_lines=120),
|
|
]
|
|
)
|
|
|
|
|
|
class ReadIssueTool(BaseTool):
|
|
name: str = "read_issue_358"
|
|
description: str = "Returns the text of Gitea issue #358 that we are evaluating."
|
|
|
|
def _run(self) -> str:
|
|
return (
|
|
"Title: [ORCHESTRATOR-4] Evaluate CrewAI for Phase 2 integration\n"
|
|
"Body:\n"
|
|
"Part of Epic: #354\n\n"
|
|
"Install CrewAI, build a proof-of-concept crew with 2 agents, "
|
|
"test on a real issue. Evaluate: does it add value over our custom orchestrator? Document findings."
|
|
)
|
|
|
|
|
|
# ── Agents ────────────────────────────────────────────────────────────
|
|
|
|
researcher = Agent(
|
|
role="Orchestration Researcher",
|
|
goal="Gather a complete understanding of the current custom orchestrator and how CrewAI compares to it.",
|
|
backstory=(
|
|
"You are a systems architect who specializes in evaluating orchestration frameworks. "
|
|
"You read code carefully, extract facts, and avoid speculation. "
|
|
"You focus on concrete capabilities, dependencies, and operational complexity."
|
|
),
|
|
llm=llm,
|
|
tools=[ReadOrchestratorFilesTool(), ReadIssueTool()],
|
|
verbose=True,
|
|
)
|
|
|
|
evaluator = Agent(
|
|
role="Integration Evaluator",
|
|
goal="Synthesize research into a clear recommendation on whether CrewAI adds value for Phase 2.",
|
|
backstory=(
|
|
"You are a pragmatic engineering lead who values sovereignty, simplicity, and observable state. "
|
|
"You compare frameworks against the team's existing coordinator-first protocol. "
|
|
"You produce structured recommendations with explicit trade-offs."
|
|
),
|
|
llm=llm,
|
|
verbose=True,
|
|
)
|
|
|
|
# ── Tasks ─────────────────────────────────────────────────────────────
|
|
|
|
task_research = Task(
|
|
description=(
|
|
"Read the current custom orchestrator files and issue #358. "
|
|
"Produce a structured research report covering:\n"
|
|
"1. Current stack summary (Huey + tasks.py + timmy-orchestrator.sh)\n"
|
|
"2. Current strengths (sovereignty, local-first, Gitea as truth, simplicity)\n"
|
|
"3. Current gaps or limitations (if any)\n"
|
|
"4. What CrewAI offers (agent roles, tasks, crews, tools, memory/RAG)\n"
|
|
"5. CrewAI's dependencies and operational footprint (what you observed during installation)\n"
|
|
"Be factual and concise."
|
|
),
|
|
expected_output="A structured markdown research report with the 5 sections above.",
|
|
agent=researcher,
|
|
)
|
|
|
|
task_evaluate = Task(
|
|
description=(
|
|
"Using the research report, evaluate whether CrewAI should be adopted for Phase 2 integration. "
|
|
"Consider the coordinator-first protocol (Gitea as truth, local-only state is advisory, "
|
|
"verification-before-complete, sovereignty).\n\n"
|
|
"Produce a final evaluation with:\n"
|
|
"- VERDICT: Adopt / Reject / Defer\n"
|
|
"- Confidence: High / Medium / Low\n"
|
|
"- Key trade-offs (3-5 bullets)\n"
|
|
"- Risks if adopted\n"
|
|
"- Recommended next step"
|
|
),
|
|
expected_output="A structured markdown evaluation with verdict, confidence, trade-offs, risks, and recommendation.",
|
|
agent=evaluator,
|
|
context=[task_research],
|
|
)
|
|
|
|
# ── Crew ──────────────────────────────────────────────────────────────
|
|
|
|
crew = Crew(
|
|
agents=[researcher, evaluator],
|
|
tasks=[task_research, task_evaluate],
|
|
verbose=True,
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
print("=" * 70)
|
|
print("CrewAI PoC — Evaluating CrewAI for Phase 2 Integration")
|
|
print("=" * 70)
|
|
result = crew.kickoff()
|
|
print("\n" + "=" * 70)
|
|
print("FINAL OUTPUT")
|
|
print("=" * 70)
|
|
print(result.raw)
|