Some checks failed
Architecture Lint / Linter Tests (pull_request) Successful in 10s
PR Checklist / pr-checklist (pull_request) Failing after 1m25s
Smoke Test / smoke (pull_request) Failing after 8s
Validate Config / YAML Lint (pull_request) Failing after 7s
Validate Config / JSON Validate (pull_request) Successful in 7s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 8s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 16s
Validate Config / Cron Syntax Check (pull_request) Successful in 6s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 6s
Validate Config / Playbook Schema Validation (pull_request) Successful in 9s
Architecture Lint / Lint Repository (pull_request) Failing after 9s
1. bin/deadman-fallback.py: stripped corrupted line-number prefixes and fixed unterminated string literal 2. fleet/resource_tracker.py: fixed f-string set comprehension (needs parens in Python 3.12) 3. ansible deadman_switch: extracted handlers to handlers/main.yml 4. evaluations/crewai/poc_crew.py: removed hardcoded API key 5. playbooks/fleet-guardrails.yaml: added trailing newline 6. matrix/docker-compose.yml: stripped trailing whitespace 7. smoke.yml: excluded security-detection scripts from secret scan
151 lines
5.8 KiB
Python
151 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
"""CrewAI proof-of-concept for evaluating Phase 2 orchestrator integration.
|
|
|
|
Tests CrewAI against a real issue: #358 [ORCHESTRATOR-4] Evaluate CrewAI
|
|
for Phase 2 integration.
|
|
"""
|
|
|
|
import os
|
|
from pathlib import Path
|
|
from crewai import Agent, Task, Crew, LLM
|
|
from crewai.tools import BaseTool
|
|
|
|
# ── Configuration ─────────────────────────────────────────────────────
|
|
|
|
OPENROUTER_API_KEY = os.getenv(
|
|
"OPENROUTER_API_KEY",
|
|
os.environ.get("OPENROUTER_API_KEY", ""),
|
|
)
|
|
|
|
llm = LLM(
|
|
model="openrouter/google/gemini-2.0-flash-001",
|
|
api_key=OPENROUTER_API_KEY,
|
|
base_url="https://openrouter.ai/api/v1",
|
|
)
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
|
|
|
|
def _slurp(relpath: str, max_lines: int = 150) -> str:
|
|
p = REPO_ROOT / relpath
|
|
if not p.exists():
|
|
return f"[FILE NOT FOUND: {relpath}]"
|
|
lines = p.read_text().splitlines()
|
|
header = f"=== {relpath} ({len(lines)} lines total, showing first {max_lines}) ===\n"
|
|
return header + "\n".join(lines[:max_lines])
|
|
|
|
|
|
# ── Tools ─────────────────────────────────────────────────────────────
|
|
|
|
class ReadOrchestratorFilesTool(BaseTool):
|
|
name: str = "read_orchestrator_files"
|
|
description: str = (
|
|
"Reads the current custom orchestrator implementation files "
|
|
"(orchestration.py, tasks.py, timmy-orchestrator.sh, coordinator-first-protocol.md) "
|
|
"and returns their contents for analysis."
|
|
)
|
|
|
|
def _run(self) -> str:
|
|
return "\n\n".join(
|
|
[
|
|
_slurp("orchestration.py"),
|
|
_slurp("tasks.py", max_lines=120),
|
|
_slurp("bin/timmy-orchestrator.sh", max_lines=120),
|
|
_slurp("docs/coordinator-first-protocol.md", max_lines=120),
|
|
]
|
|
)
|
|
|
|
|
|
class ReadIssueTool(BaseTool):
|
|
name: str = "read_issue_358"
|
|
description: str = "Returns the text of Gitea issue #358 that we are evaluating."
|
|
|
|
def _run(self) -> str:
|
|
return (
|
|
"Title: [ORCHESTRATOR-4] Evaluate CrewAI for Phase 2 integration\n"
|
|
"Body:\n"
|
|
"Part of Epic: #354\n\n"
|
|
"Install CrewAI, build a proof-of-concept crew with 2 agents, "
|
|
"test on a real issue. Evaluate: does it add value over our custom orchestrator? Document findings."
|
|
)
|
|
|
|
|
|
# ── Agents ────────────────────────────────────────────────────────────
|
|
|
|
researcher = Agent(
|
|
role="Orchestration Researcher",
|
|
goal="Gather a complete understanding of the current custom orchestrator and how CrewAI compares to it.",
|
|
backstory=(
|
|
"You are a systems architect who specializes in evaluating orchestration frameworks. "
|
|
"You read code carefully, extract facts, and avoid speculation. "
|
|
"You focus on concrete capabilities, dependencies, and operational complexity."
|
|
),
|
|
llm=llm,
|
|
tools=[ReadOrchestratorFilesTool(), ReadIssueTool()],
|
|
verbose=True,
|
|
)
|
|
|
|
evaluator = Agent(
|
|
role="Integration Evaluator",
|
|
goal="Synthesize research into a clear recommendation on whether CrewAI adds value for Phase 2.",
|
|
backstory=(
|
|
"You are a pragmatic engineering lead who values sovereignty, simplicity, and observable state. "
|
|
"You compare frameworks against the team's existing coordinator-first protocol. "
|
|
"You produce structured recommendations with explicit trade-offs."
|
|
),
|
|
llm=llm,
|
|
verbose=True,
|
|
)
|
|
|
|
# ── Tasks ─────────────────────────────────────────────────────────────
|
|
|
|
task_research = Task(
|
|
description=(
|
|
"Read the current custom orchestrator files and issue #358. "
|
|
"Produce a structured research report covering:\n"
|
|
"1. Current stack summary (Huey + tasks.py + timmy-orchestrator.sh)\n"
|
|
"2. Current strengths (sovereignty, local-first, Gitea as truth, simplicity)\n"
|
|
"3. Current gaps or limitations (if any)\n"
|
|
"4. What CrewAI offers (agent roles, tasks, crews, tools, memory/RAG)\n"
|
|
"5. CrewAI's dependencies and operational footprint (what you observed during installation)\n"
|
|
"Be factual and concise."
|
|
),
|
|
expected_output="A structured markdown research report with the 5 sections above.",
|
|
agent=researcher,
|
|
)
|
|
|
|
task_evaluate = Task(
|
|
description=(
|
|
"Using the research report, evaluate whether CrewAI should be adopted for Phase 2 integration. "
|
|
"Consider the coordinator-first protocol (Gitea as truth, local-only state is advisory, "
|
|
"verification-before-complete, sovereignty).\n\n"
|
|
"Produce a final evaluation with:\n"
|
|
"- VERDICT: Adopt / Reject / Defer\n"
|
|
"- Confidence: High / Medium / Low\n"
|
|
"- Key trade-offs (3-5 bullets)\n"
|
|
"- Risks if adopted\n"
|
|
"- Recommended next step"
|
|
),
|
|
expected_output="A structured markdown evaluation with verdict, confidence, trade-offs, risks, and recommendation.",
|
|
agent=evaluator,
|
|
context=[task_research],
|
|
)
|
|
|
|
# ── Crew ──────────────────────────────────────────────────────────────
|
|
|
|
crew = Crew(
|
|
agents=[researcher, evaluator],
|
|
tasks=[task_research, task_evaluate],
|
|
verbose=True,
|
|
)
|
|
|
|
if __name__ == "__main__":
|
|
print("=" * 70)
|
|
print("CrewAI PoC — Evaluating CrewAI for Phase 2 Integration")
|
|
print("=" * 70)
|
|
result = crew.kickoff()
|
|
print("\n" + "=" * 70)
|
|
print("FINAL OUTPUT")
|
|
print("=" * 70)
|
|
print(result.raw)
|