Files
hermes-agent/scripts/forge.py
Ezra ab7fd52ae3
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Docs Site Checks / docs-site-checks (pull_request) Failing after 4s
Nix / nix (ubuntu-latest) (pull_request) Failing after 1s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Failing after 1s
Tests / test (pull_request) Failing after 4s
Tests / e2e (pull_request) Failing after 2s
Nix / nix (macos-latest) (pull_request) Has been cancelled
[EPIC-999] Phase II — The Forge: claw_runtime scaffold + forge pipeline
- agent/claw_runtime.py: 5-class decomposition of AIAgent (ConversationLoop, ModelDispatcher, ToolExecutor, MemoryInterceptor, PromptBuilder)
- scripts/forge.py: competing sub-agent rewrite pipeline with Arbiter scoring

Both are facades today; logic migrates incrementally from run_agent.py.

Authored-by: Ezra
2026-04-05 23:32:53 +00:00

192 lines
6.9 KiB
Python

#!/usr/bin/env python3
"""
forge.py — The Forge: competing sub-agent rewrite pipeline.
Part of EPIC-999 Phase II.
Given a target module, spawn N sub-agents to rewrite it independently.
An Arbiter evaluates each candidate on:
1. Test pass rate
2. SLOC reduction (or bounded increase)
3. Cyclomatic complexity
4. API surface stability (diff against original public interface)
The winner is promoted to the integration branch.
"""
import argparse
import json
import os
import subprocess
import sys
import tempfile
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Dict
@dataclass
class RewriteCandidate:
agent_name: str
branch_name: str
module_path: Path
temp_dir: Path
test_pass_rate: float = 0.0
sloc_delta: int = 0
complexity_score: float = 0.0
api_surface_score: float = 0.0
total_score: float = 0.0
logs: List[str] = field(default_factory=list)
class ForgePipeline:
"""Competing rewrite pipeline for clean-room module evolution."""
def __init__(self, repo_path: Path, target_module: str, agents: List[str]):
self.repo_path = repo_path.resolve()
self.target_module = target_module
self.agents = agents
self.work_dir = Path(tempfile.mkdtemp(prefix="forge_"))
self.candidates: List[RewriteCandidate] = []
def _spawn_agent_rewrite(self, agent_name: str, index: int) -> RewriteCandidate:
"""Spawn a single sub-agent rewrite."""
branch_name = f"forge-{agent_name}-{int(time.time())}-{index}"
candidate_dir = self.work_dir / branch_name
candidate_dir.mkdir(parents=True, exist_ok=True)
# Copy repo into candidate workspace
subprocess.run(
["cp", "-r", str(self.repo_path), str(candidate_dir / "repo")],
check=True,
capture_output=True,
)
candidate = RewriteCandidate(
agent_name=agent_name,
branch_name=branch_name,
module_path=candidate_dir / "repo" / self.target_module,
temp_dir=candidate_dir,
)
# TODO: replace with actual sub-agent invocation via delegate_tool.py
# For now, we write a marker file so the pipeline structure is testable.
marker = candidate_dir / "rewrite.marker"
marker.write_text(
f"agent={agent_name}\n"
f"target={self.target_module}\n"
f"timestamp={time.time()}\n"
)
candidate.logs.append(f"Spawned {agent_name} in {branch_name}")
return candidate
def run_rewrites(self) -> List[RewriteCandidate]:
"""Run all competing rewrites in parallel."""
print(f"[Forge] Starting {len(self.agents)} competing rewrites for {self.target_module}")
for idx, agent in enumerate(self.agents):
candidate = self._spawn_agent_rewrite(agent, idx)
self.candidates.append(candidate)
print(f" -> {candidate.branch_name}")
return self.candidates
def evaluate_candidate(self, candidate: RewriteCandidate) -> RewriteCandidate:
"""Run test suite and metrics on a candidate."""
repo = candidate.temp_dir / "repo"
# SLOC calculation
try:
with open(candidate.module_path, "r", encoding="utf-8") as f:
candidate.sloc_delta = len(f.readlines())
except Exception as e:
candidate.logs.append(f"SLOC error: {e}")
# Test execution (best-effort; requires venv + deps)
test_cmd = [
sys.executable, "-m", "pytest",
"tests/", "-q", "--tb=short",
"-x",
]
try:
result = subprocess.run(
test_cmd,
cwd=repo,
capture_output=True,
text=True,
timeout=300,
)
# Naive pass-rate parsing
if "passed" in result.stdout:
parts = result.stdout.split(",")
passed = 0
total = 1
for part in parts:
if "passed" in part:
passed = int(part.strip().split()[0])
if "failed" in part or "error" in part:
total += int(part.strip().split()[0])
total = max(total, passed)
candidate.test_pass_rate = passed / total if total else 0.0
elif result.returncode == 0:
candidate.test_pass_rate = 1.0
else:
candidate.test_pass_rate = 0.0
candidate.logs.append(f"Tests: returncode={result.returncode}")
except Exception as e:
candidate.logs.append(f"Test error: {e}")
candidate.test_pass_rate = 0.0
# Scoring (naive weighted sum; will be refined)
candidate.total_score = (
candidate.test_pass_rate * 100.0
- max(candidate.sloc_delta - 500, 0) * 0.01 # penalty for bloat
)
return candidate
def arbitrate(self) -> RewriteCandidate:
"""Evaluate all candidates and return the winner."""
print("[Forge] Evaluating candidates...")
for candidate in self.candidates:
self.evaluate_candidate(candidate)
print(f" {candidate.agent_name}: tests={candidate.test_pass_rate:.2%} "
f"sloc={candidate.sloc_delta} score={candidate.total_score:.2f}")
winner = max(self.candidates, key=lambda c: c.total_score)
print(f"[Forge] Winner: {winner.agent_name} ({winner.branch_name}) "
f"score={winner.total_score:.2f}")
return winner
def promote_winner(self, winner: RewriteCandidate, integration_branch: str):
"""Promote the winning candidate to the integration branch."""
# TODO: git checkout -b integration_branch, copy winner module, commit, push
print(f"[Forge] Promoting {winner.branch_name} -> {integration_branch}")
marker = self.repo_path / "FORGE_WINNER.marker"
marker.write_text(
f"winner={winner.agent_name}\n"
f"branch={winner.branch_name}\n"
f"score={winner.total_score}\n"
)
def main():
parser = argparse.ArgumentParser(description="The Forge — competing rewrite pipeline")
parser.add_argument("--repo", required=True, help="Path to the target repo")
parser.add_argument("--module", required=True, help="Target module path (relative to repo)")
parser.add_argument("--agents", nargs="+", default=["allegro", "bezalel"],
help="Agent names to compete")
parser.add_argument("--integration-branch", default="forge-integration",
help="Branch to promote winner into")
args = parser.parse_args()
forge = ForgePipeline(
repo_path=Path(args.repo),
target_module=args.module,
agents=args.agents,
)
forge.run_rewrites()
winner = forge.arbitrate()
forge.promote_winner(winner, args.integration_branch)
if __name__ == "__main__":
main()