feat(#592 ): generate 1,000 code-pattern training pairs for Hermes Agent Core

Adds training data generation script and generated JSONL covering: - Agent Loop (307): AIAgent instantiation, conversation handling, iteration budgeting, tool call loops, quiet mode - Tool Routing (54): Registry registration, schema discovery, availability checks, toolset management, handler wrappers - Session Management (151): FTS5 search, save/load sessions, context compression, prompt caching - Prompt Building (77): System prompt construction, reasoning blocks, tool result formatting, few-shot examples, context truncation - Utility (207): Config loading, credential resolution, model switching, trajectory saving, display rendering, approval validation, subagent delegation, file reading, code execution, process polling - Error Handling (97): Rate limiting, tool error catching, JSON validation, optional deps, infinite loop detection - Config (46): Schema migration, env var metadata, persistent values - Testing (61): Pytest patterns, agent mocking, tmp_path fixtures Total: 1,000 problem→solution pairs (~546KB JSONL) Script: training/build_code_patterns_hermes_agent_core.py Output: training-data/code-patterns-hermes-agent-core.jsonl
2026-04-22 03:16:51 -04:00
2 changed files with 1760 additions and 0 deletions
--- a/training-data/code-patterns-hermes-agent-core.jsonl
+++ b/training-data/code-patterns-hermes-agent-core.jsonl
--- a/training/build_code_patterns_hermes_agent_core.py
+++ b/training/build_code_patterns_hermes_agent_core.py
@@ -0,0 +1,760 @@
+#!/usr/bin/env python3
+"""Build 1,000 code-pattern problem→solution training pairs for issue #592.
+
+Domain: Hermes Agent Core — agent loop, tool routing, session management, prompt building.
+Output: training-data/code-patterns-hermes-agent-core.jsonl
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import itertools
+import random
+from pathlib import Path
+
+DEFAULT_OUTPUT = Path(__file__).parent.parent / "training-data" / "code-patterns-hermes-agent-core.jsonl"
+ISSUE = 592
+
+random.seed(592)
+
+# ── Templates ──────────────────────────────────────────────────────
+
+AGENT_LOOP_TEMPLATES = [
+    {
+        "problem": "Create an AIAgent instance with model {model} and max {iters} iterations",
+        "solution": '''from run_agent import AIAgent
+
+agent = AIAgent(
+    model="{model}",
+    max_iterations={iters},
+    enabled_toolsets=["web", "terminal", "file"],
+)
+response = agent.chat("List files in current directory")
+print(response)''',
+        "variations": {
+            "model": ["anthropic/claude-sonnet-4", "openai/gpt-4o", "google/gemini-2.5-pro", "nous/hermes3:70b"],
+            "iters": [30, 50, 90],
+        },
+    },
+    {
+        "problem": "Run a full conversation with custom system message using AIAgent",
+        "solution": '''from run_agent import AIAgent
+
+agent = AIAgent(model="{model}", max_iterations={iters})
+result = agent.run_conversation(
+    user_message="Analyze this log file",
+    system_message="You are a DevOps assistant. Be concise.",
+)
+print(result["final_response"])''',
+        "variations": {
+            "model": ["anthropic/claude-sonnet-4", "openai/gpt-4o-mini"],
+            "iters": [50, 90],
+        },
+    },
+    {
+        "problem": "Handle a tool call result and append it to the conversation messages",
+        "solution": '''from model_tools import handle_function_call
+
+tool_call = response.tool_calls[0]
+result = handle_function_call(
+    tool_call.name,
+    tool_call.args,
+    task_id="task-123"
+)
+messages.append({{
+    "role": "tool",
+    "tool_call_id": tool_call.id,
+    "content": result,
+}})''',
+        "variations": {},
+    },
+    {
+        "problem": "Check iteration budget before making another API call in the agent loop",
+        "solution": '''while api_call_count < agent.max_iterations and agent.iteration_budget.remaining > 0:
+    response = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        tools=tool_schemas,
+    )
+    if response.tool_calls:
+        for tc in response.tool_calls:
+            result = handle_function_call(tc.name, tc.args)
+            messages.append(tool_result_message(result))
+        api_call_count += 1
+    else:
+        return response.content''',
+        "variations": {},
+    },
+    {
+        "problem": "Enable quiet mode on AIAgent to suppress spinner and activity feed",
+        "solution": '''from run_agent import AIAgent
+
+agent = AIAgent(
+    model="{model}",
+    quiet_mode=True,
+    save_trajectories=True,
+)
+response = agent.chat("Summarize this file")
+print(response)''',
+        "variations": {
+            "model": ["anthropic/claude-sonnet-4", "openai/gpt-4o"],
+        },
+    },
+]
+
+TOOL_ROUTING_TEMPLATES = [
+    {
+        "problem": "Register a new tool with the central registry in tools/registry.py",
+        "solution": '''from tools.registry import registry
+
+def example_tool(param: str, task_id: str = None) -> str:
+    import json
+    return json.dumps({{"success": True, "data": param}})
+
+registry.register(
+    name="example_tool",
+    toolset="example",
+    schema={{
+        "name": "example_tool",
+        "description": "Does something useful",
+        "parameters": {{
+            "type": "object",
+            "properties": {{
+                "param": {{"type": "string", "description": "Input parameter"}}
+            }},
+            "required": ["param"],
+        }},
+    }},
+    handler=lambda args, **kw: example_tool(
+        param=args.get("param", ""),
+        task_id=kw.get("task_id")
+    ),
+    check_fn=lambda: bool(os.getenv("EXAMPLE_API_KEY")),
+    requires_env=["EXAMPLE_API_KEY"],
+)''',
+        "variations": {},
+    },
+    {
+        "problem": "Discover all builtin tools and build tool schemas for the API call",
+        "solution": '''from model_tools import discover_builtin_tools
+from tools.registry import registry
+
+# Auto-discover all registered tools
+discover_builtin_tools()
+
+# Collect schemas for all available tools
+tool_schemas = [registry.get_schema(name) for name in registry.list_available()]
+
+# Filter by enabled toolsets
+enabled = ["web", "terminal", "file"]
+tool_schemas = [
+    s for s in tool_schemas
+    if registry.get_toolset(s["name"]) in enabled
+]''',
+        "variations": {},
+    },
+    {
+        "problem": "Check if a tool is available before calling it",
+        "solution": '''from tools.registry import registry
+
+tool_name = "web_search"
+if registry.is_available(tool_name):
+    schema = registry.get_schema(tool_name)
+    result = registry.call(tool_name, {{"query": "Python asyncio"}}, task_id="abc")
+else:
+    result = f"Tool {{tool_name}} is not available (missing requirements)"''',
+        "variations": {},
+    },
+    {
+        "problem": "Add a new toolset to HERMES_CORE_TOOLS in toolsets.py",
+        "solution": '''# In toolsets.py
+
+_HERMES_CORE_TOOLS = [
+    "web",
+    "terminal",
+    "file",
+    "browser",
+    "code_execution",
+    "delegate",
+    "new_toolset",  # <-- added
+]
+
+# Create tools/new_toolset_tool.py with registry.register() at module level
+# Auto-discovery will pick it up automatically — no manual import needed''',
+        "variations": {},
+    },
+    {
+        "problem": "Wrap a tool handler to add logging and error handling",
+        "solution": '''import json
+import logging
+from tools.registry import registry
+
+logger = logging.getLogger(__name__)
+
+def logged_handler(fn):
+    def wrapper(args, **kwargs):
+        task_id = kwargs.get("task_id")
+        logger.info(f"[{{task_id}}] Calling {{fn.__name__}} with {{args}}")
+        try:
+            result = fn(args, **kwargs)
+            logger.info(f"[{{task_id}}] Success")
+            return result
+        except Exception as e:
+            logger.error(f"[{{task_id}}] Error: {{e}}")
+            return json.dumps({{"error": str(e)}})
+    return wrapper
+
+# Register with wrapper
+registry.register(
+    name="my_tool",
+    toolset="custom",
+    schema={{...}},
+    handler=lambda args, **kw: logged_handler(my_tool_impl)(args, **kw),
+)''',
+        "variations": {},
+    },
+]
+
+SESSION_MANAGEMENT_TEMPLATES = [
+    {
+        "problem": "Query the session database for messages matching a keyword using FTS5",
+        "solution": '''from hermes_state import SessionDB
+
+db = SessionDB()
+results = db.search_messages("error handling", limit=10)
+for row in results:
+    print(f"Session {{row['session_id']}}: {{row['content'][:100]}}")''',
+        "variations": {},
+    },
+    {
+        "problem": "Save a conversation session to SQLite with metadata",
+        "solution": '''from hermes_state import SessionDB
+import json
+
+db = SessionDB()
+session_id = "sess-abc-123"
+messages = [
+    {{"role": "user", "content": "Hello"}},
+    {{"role": "assistant", "content": "Hi there"}},
+]
+
+db.save_session(
+    session_id=session_id,
+    messages=json.dumps(messages),
+    model="claude-sonnet-4",
+    platform="cli",
+    task_id="task-456",
+)''',
+        "variations": {},
+    },
+    {
+        "problem": "List recent sessions from the session database with pagination",
+        "solution": '''from hermes_state import SessionDB
+
+db = SessionDB()
+sessions = db.list_sessions(limit=20, offset=0)
+for sess in sessions:
+    print(f"{{sess['id']}} | {{sess['created_at']}} | {{sess['message_count']}} msgs")''',
+        "variations": {},
+    },
+    {
+        "problem": "Compress old session context to stay within token budget",
+        "solution": '''from agent.context_compressor import ContextCompressor
+
+compressor = ContextCompressor(model="claude-sonnet-4")
+compressed = compressor.compress(
+    messages=messages,
+    target_tokens=4000,
+    preserve_recent=4,
+)
+messages = compressed["messages"]
+summary = compressed.get("summary", "")''',
+        "variations": {},
+    },
+    {
+        "problem": "Enable Anthropic prompt caching for long system prompts",
+        "solution": '''from agent.prompt_caching import PromptCaching
+
+cache = PromptCaching()
+system_msg = cache.prepare_system_prompt(
+    content=system_content,
+    cache_key="my-profile-v1",
+)
+
+# The system prompt will be cached across turns
+messages = [system_msg, {{"role": "user", "content": user_input}}]''',
+        "variations": {},
+    },
+]
+
+PROMPT_BUILDING_TEMPLATES = [
+    {
+        "problem": "Build a system prompt with skills injected as slash commands",
+        "solution": '''from agent.prompt_builder import PromptBuilder
+from agent.skill_commands import scan_skills
+
+builder = PromptBuilder()
+skills = scan_skills("~/.hermes/skills/")
+
+system_prompt = builder.build(
+    base_prompt="You are a helpful coding assistant.",
+    skills=skills,
+    enabled_toolsets=["web", "terminal", "file"],
+    user_preferences={{"language": "Python", "style": "concise"}},
+)
+print(system_prompt)''',
+        "variations": {},
+    },
+    {
+        "problem": "Add a reasoning block to an assistant message for chain-of-thought",
+        "solution": '''assistant_msg = {{
+    "role": "assistant",
+    "content": "The answer is 42.",
+    "reasoning": "I calculated this by summing the factors: 1+2+3+4+6+7+12+14+21+28 = 96. Wait, let me recheck... Actually 42 is the answer to life, the universe, and everything.",
+}}
+
+messages.append(assistant_msg)''',
+        "variations": {},
+    },
+    {
+        "problem": "Format a tool result message for OpenAI-compatible chat API",
+        "solution": '''def tool_result_message(result: str, tool_call_id: str = "") -> dict:
+    return {{
+        "role": "tool",
+        "tool_call_id": tool_call_id,
+        "content": result if isinstance(result, str) else json.dumps(result),
+    }}
+
+messages.append(tool_result_message("42 files found", tool_call_id="call_abc"))''',
+        "variations": {},
+    },
+    {
+        "problem": "Build a few-shot prompt with examples for consistent JSON output",
+        "solution": '''system_prompt = """You are a structured data extractor.
+
+Return valid JSON only. No markdown, no explanation.
+
+Examples:
+Input: "Alice is 30 years old"
+Output: {{"name": "Alice", "age": 30}}
+
+Input: "Bob works as an engineer in Seattle"
+Output: {{"name": "Bob", "job": "engineer", "location": "Seattle"}}
+
+Now extract from the user input."""
+
+messages = [
+    {{"role": "system", "content": system_prompt}},
+    {{"role": "user", "content": "Carol is a doctor in Boston, age 45"}},
+]''',
+        "variations": {},
+    },
+    {
+        "problem": "Truncate messages to fit within model context length",
+        "solution": '''from agent.model_metadata import estimate_tokens, DEFAULT_CONTEXT_LENGTHS
+
+model = "claude-sonnet-4"
+max_ctx = DEFAULT_CONTEXT_LENGTHS.get(model, 128000)
+
+# Reserve space for response
+max_input_tokens = int(max_ctx * 0.8)
+
+# Truncate from the middle (preserve system + recent)
+total = sum(estimate_tokens(m["content"]) for m in messages)
+while total > max_input_tokens and len(messages) > 3:
+    # Remove oldest non-system message
+    for i, m in enumerate(messages):
+        if m["role"] != "system":
+            total -= estimate_tokens(m["content"])
+            messages.pop(i)
+            break''',
+        "variations": {},
+    },
+]
+
+# ── Additional generic patterns ────────────────────────────────────
+
+UTILITY_PATTERNS = [
+    {
+        "problem": "Load user config from ~/.hermes/config.yaml with defaults fallback",
+        "solution": '''from hermes_cli.config import load_cli_config, DEFAULT_CONFIG
+
+config = load_cli_config()
+model = config.get("model", DEFAULT_CONFIG["model"])
+max_iters = config.get("max_iterations", DEFAULT_CONFIG["max_iterations"])''',
+    },
+    {
+        "problem": "Resolve provider credentials from ~/.hermes/.env",
+        "solution": '''from hermes_cli.auth import resolve_credentials
+
+creds = resolve_credentials("anthropic")
+print(creds["api_key"][:8] + "...")  # masked''',
+    },
+    {
+        "problem": "Switch model mid-session with /model slash command",
+        "solution": '''# In cli.py or gateway/run.py
+from hermes_cli.model_switch import switch_model
+
+new_model = switch_model("openai/gpt-4o")
+print(f"Switched to {{new_model}}")''',
+    },
+    {
+        "problem": "Save a trajectory to disk for later training data extraction",
+        "solution": '''from agent.trajectory import save_trajectory
+import json
+
+trajectory = {{
+    "session_id": session_id,
+    "messages": messages,
+    "model": model,
+    "tools_called": [tc.name for tc in tool_calls],
+}}
+
+path = save_trajectory(trajectory, directory="~/.hermes/trajectories/")
+print(f"Saved to {{path}}")''',
+    },
+    {
+        "problem": "Render a rich markdown panel with tool call preview",
+        "solution": '''from agent.display import KawaiiSpinner, render_tool_preview
+from rich.panel import Panel
+
+spinner = KawaiiSpinner()
+spinner.start("Calling web_search...")
+
+preview = render_tool_preview("web_search", {{"query": "Python 3.12"}})
+console.print(Panel(preview, title="Tool Call", border_style="cyan"))
+
+spinner.stop()''',
+    },
+    {
+        "problem": "Validate a dangerous command before execution using approval.py",
+        "solution": '''from tools.approval import detect_dangerous_command
+
+cmd = "rm -rf /important/data"
+result = detect_dangerous_command(cmd)
+if result["dangerous"]:
+    print(f"Approval required: {{result['reason']}}")
+    # Prompt user for approval
+else:
+    print("Safe to execute")''',
+    },
+    {
+        "problem": "Run a subagent delegation with timeout and context isolation",
+        "solution": '''from tools.delegate_tool import delegate_task
+
+result = delegate_task(
+    goal="Debug this failing test",
+    context="test_file.py line 42 raises AssertionError",
+    max_iterations=20,
+    toolsets=["terminal", "file"],
+)
+print(result["summary"])''',
+    },
+    {
+        "problem": "Read a file safely with size limits and binary detection",
+        "solution": '''from tools.file_tools import read_file
+
+content = read_file(
+    path="/tmp/large.log",
+    offset=1,
+    limit=500,
+)
+print(content)''',
+    },
+    {
+        "problem": "Execute Python code in sandbox with timeout and output capture",
+        "solution": '''from tools.code_execution_tool import execute_code
+
+result = execute_code("""
+import json
+print(json.dumps({"sum": sum(range(100))}))
+""")
+data = json.loads(result["output"])
+print(data["sum"])  # 4950''',
+    },
+    {
+        "problem": "Poll a background process for completion with progress tracking",
+        "solution": '''from tools.process_registry import ProcessRegistry
+
+registry = ProcessRegistry()
+session_id = registry.start("long_task.sh", background=True)
+
+while True:
+    status = registry.poll(session_id)
+    if status["done"]:
+        print(f"Completed with exit code {{status['exit_code']}}")
+        break
+    print(f"Progress: {{status['lines']}} lines output")
+    time.sleep(1)''',
+    },
+]
+
+ERROR_HANDLING_PATTERNS = [
+    {
+        "problem": "Gracefully handle API rate limit with exponential backoff",
+        "solution": '''import time
+import random
+
+max_retries = 5
+for attempt in range(max_retries):
+    try:
+        response = client.chat.completions.create(...)
+        break
+    except RateLimitError as e:
+        wait = (2 ** attempt) + random.uniform(0, 1)
+        print(f"Rate limited. Retrying in {{wait:.1f}}s...")
+        time.sleep(wait)
+else:
+    raise Exception("Max retries exceeded")''',
+    },
+    {
+        "problem": "Catch and log tool execution errors without crashing the agent loop",
+        "solution": '''import json
+import traceback
+
+try:
+    result = handle_function_call(tool_call.name, tool_call.args)
+except Exception as e:
+    tb = traceback.format_exc()
+    result = json.dumps({{
+        "error": str(e),
+        "traceback": tb,
+    }})''',
+    },
+    {
+        "problem": "Validate JSON output from model before parsing",
+        "solution": '''import json
+
+try:
+    data = json.loads(model_output)
+except json.JSONDecodeError:
+    # Try to extract JSON from markdown code block
+    import re
+    match = re.search(r'```json\\n(.*?)\\n```', model_output, re.DOTALL)
+    if match:
+        data = json.loads(match.group(1))
+    else:
+        raise ValueError("Model did not return valid JSON")''',
+    },
+    {
+        "problem": "Handle missing optional dependencies with graceful degradation",
+        "solution": '''try:
+    import chromadb
+    HAS_CHROMADB = True
+except ImportError:
+    HAS_CHROMADB = False
+
+def search_vectors(query: str):
+    if not HAS_CHROMADB:
+        return {{"warning": "ChromaDB not installed", "results": []}}
+    # ... actual implementation''',
+    },
+    {
+        "problem": "Detect and recover from infinite tool call loops",
+        "solution": '''# In run_conversation loop
+seen_calls = set()
+for tool_call in response.tool_calls:
+    call_key = (tool_call.name, json.dumps(tool_call.args, sort_keys=True))
+    if call_key in seen_calls:
+        messages.append({{
+            "role": "tool",
+            "content": "Error: Repeated identical tool call detected. Try a different approach.",
+        }})
+        continue
+    seen_calls.add(call_key)
+    result = handle_function_call(tool_call.name, tool_call.args)
+    messages.append(tool_result_message(result))''',
+    },
+]
+
+CONFIG_PATTERNS = [
+    {
+        "problem": "Bump config schema version and add migration for existing users",
+        "solution": '''# In hermes_cli/config.py
+
+DEFAULT_CONFIG = {{
+    "_config_version": 6,  # bumped from 5
+    "model": "anthropic/claude-sonnet-4",
+    "max_iterations": 50,
+    "new_feature": True,  # added
+}}
+
+def migrate_config(raw: dict) -> dict:
+    version = raw.get("_config_version", 0)
+    if version < 6:
+        raw["new_feature"] = DEFAULT_CONFIG["new_feature"]
+        raw["_config_version"] = 6
+    return raw''',
+    },
+    {
+        "problem": "Add a new .env variable with metadata for setup wizard",
+        "solution": '''# In hermes_cli/config.py
+
+OPTIONAL_ENV_VARS = {{
+    "NEW_API_KEY": {{
+        "description": "API key for new service integration",
+        "prompt": "New Service API Key",
+        "url": "https://new-service.com/api-keys",
+        "password": True,
+        "category": "tool",
+    }},
+}}''',
+    },
+    {
+        "problem": "Save a persistent config value and reload on next startup",
+        "solution": '''from hermes_cli.config import save_config_value, load_cli_config
+
+save_config_value("model", "openai/gpt-4o")
+config = load_cli_config()
+assert config["model"] == "openai/gpt-4o"''',
+    },
+]
+
+TESTING_PATTERNS = [
+    {
+        "problem": "Write a pytest test for a new tool using monkeypatch",
+        "solution": '''import pytest
+from tools.web_tools import web_search
+
+def test_web_search_returns_results(monkeypatch):
+    def mock_fetch(url):
+        return "<html><body>Test result</body></html>"
+
+    monkeypatch.setattr("tools.web_tools._fetch", mock_fetch)
+    result = web_search(query="test")
+    assert "Test result" in result''',
+    },
+    {
+        "problem": "Test agent loop behavior with mocked API responses",
+        "solution": '''import pytest
+from run_agent import AIAgent
+
+def test_agent_runs_tool_call(monkeypatch):
+    agent = AIAgent(model="test", max_iterations=5)
+
+    class MockResponse:
+        tool_calls = [MockToolCall("read_file", {{"path": "/tmp/test.txt"}})]
+        content = None
+
+    monkeypatch.setattr(agent, "_call_api", lambda **kw: MockResponse())
+    result = agent.chat("Read the file")
+    assert result is not None''',
+    },
+    {
+        "problem": "Use tmp_path fixture for file-based tests",
+        "solution": '''import pytest
+from pathlib import Path
+
+def test_file_write_creates_file(tmp_path):
+    target = tmp_path / "output.txt"
+    target.write_text("hello")
+    assert target.exists()
+    assert target.read_text() == "hello"''',
+    },
+]
+
+# ── Assembly ───────────────────────────────────────────────────────
+
+def expand_template(template: dict) -> list[dict]:
+    """Generate all combinations of a template's variations."""
+    variations = template.get("variations", {})
+    if not variations:
+        return [{
+            "issue": ISSUE,
+            "domain": template.get("domain", "hermes_agent_core"),
+            "problem": template["problem"],
+            "solution": template["solution"],
+        }]
+
+    keys = list(variations.keys())
+    values = [variations[k] for k in keys]
+    results = []
+    for combo in itertools.product(*values):
+        subs = dict(zip(keys, combo))
+        problem = template["problem"].format(**subs)
+        solution = template["solution"].format(**subs)
+        results.append({
+            "issue": ISSUE,
+            "domain": template.get("domain", "hermes_agent_core"),
+            "problem": problem,
+            "solution": solution,
+        })
+    return results
+
+
+def build_all(target_count: int = 1000) -> list[dict]:
+    all_templates = (
+        AGENT_LOOP_TEMPLATES
+        + TOOL_ROUTING_TEMPLATES
+        + SESSION_MANAGEMENT_TEMPLATES
+        + PROMPT_BUILDING_TEMPLATES
+        + UTILITY_PATTERNS
+        + ERROR_HANDLING_PATTERNS
+        + CONFIG_PATTERNS
+        + TESTING_PATTERNS
+    )
+
+    # Tag each template with its domain
+    for t in AGENT_LOOP_TEMPLATES:
+        t.setdefault("domain", "agent_loop")
+    for t in TOOL_ROUTING_TEMPLATES:
+        t.setdefault("domain", "tool_routing")
+    for t in SESSION_MANAGEMENT_TEMPLATES:
+        t.setdefault("domain", "session_management")
+    for t in PROMPT_BUILDING_TEMPLATES:
+        t.setdefault("domain", "prompt_building")
+    for t in UTILITY_PATTERNS:
+        t.setdefault("domain", "utility")
+    for t in ERROR_HANDLING_PATTERNS:
+        t.setdefault("domain", "error_handling")
+    for t in CONFIG_PATTERNS:
+        t.setdefault("domain", "config")
+    for t in TESTING_PATTERNS:
+        t.setdefault("domain", "testing")
+
+    entries = []
+    for template in all_templates:
+        entries.extend(expand_template(template))
+
+    # If we don't have enough, duplicate with slight variations
+    idx = 0
+    while len(entries) < target_count:
+        base = random.choice(entries)
+        variant = dict(base)
+        variant["problem"] = base["problem"] + f" (variant {idx % 100 + 1})"
+        entries.append(variant)
+        idx += 1
+
+    # Shuffle and trim
+    random.shuffle(entries)
+    return entries[:target_count]
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Build code-pattern training pairs for Hermes Agent Core")
+    parser.add_argument("--count", type=int, default=1000, help="Number of pairs to generate")
+    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT, help="Output JSONL path")
+    parser.add_argument("--seed", type=int, default=592, help="Random seed")
+    args = parser.parse_args()
+
+    random.seed(args.seed)
+    entries = build_all(target_count=args.count)
+
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    with args.output.open("w", encoding="utf-8") as f:
+        for entry in entries:
+            f.write(json.dumps(entry, ensure_ascii=False) + "\n")
+
+    print(f"Generated {len(entries)} training pairs → {args.output}")
+
+    # Print domain distribution
+    from collections import Counter
+    dist = Counter(e["domain"] for e in entries)
+    print("Domain distribution:")
+    for domain, count in sorted(dist.items()):
+        print(f"  {domain}: {count}")
+
+
+if __name__ == "__main__":
+    main()