"""Sovereign orchestration — Huey replaces 3,843 lines of homebrew.""" import json import logging import os import subprocess from datetime import datetime, timezone from pathlib import Path from huey import SqliteHuey, signals logger = logging.getLogger("orchestration") huey = SqliteHuey( filename=str(Path.home() / ".hermes" / "orchestration.db"), results=True, ) # === Token Tracking === TOKEN_LOG = Path.home() / ".hermes" / "token_usage.jsonl" def log_token_usage(task_name, result): """Log token usage from a completed pipeline task. Reads input_tokens/output_tokens from the agent result dict. Auto-detects pipeline name from task context. Appends to JSONL for downstream analysis. Also records to token_budget for daily enforcement. """ if not isinstance(result, dict): return input_tokens = result.get("input_tokens", 0) output_tokens = result.get("output_tokens", 0) if input_tokens == 0 and output_tokens == 0: return # Auto-detect pipeline name from task function name pipeline = task_name.replace("_task", "").replace("_", "-") entry = { "timestamp": datetime.now(timezone.utc).isoformat(), "pipeline": pipeline, "input_tokens": input_tokens, "output_tokens": output_tokens, "total_tokens": input_tokens + output_tokens, "task": task_name, } # Write to JSONL log TOKEN_LOG.parent.mkdir(parents=True, exist_ok=True) with open(TOKEN_LOG, "a") as f: f.write(json.dumps(entry) + "\n") # Record to token budget for daily enforcement try: from scripts.token_budget import record_usage record_usage(pipeline, input_tokens, output_tokens) logger.info(f"Budget updated: {pipeline} +{entry['total_tokens']} tokens") except ImportError: logger.debug("token_budget not available, skipping budget update") def check_budget(pipeline: str, estimated_tokens: int) -> bool: """Check if there's enough budget for a pipeline run.""" try: from scripts.token_budget import can_afford, get_remaining remaining = get_remaining() if not can_afford(estimated_tokens): logger.warning( f"Budget exhausted for {pipeline}: need {estimated_tokens}, " f"have {remaining}" ) return False return True except ImportError: return True # No budget module = no enforcement @huey.signal(signals.SIGNAL_COMPLETE) def on_task_complete(signal, task, task_value=None, **kwargs): """Huey hook: log token usage after each pipeline task completes.""" task_name = getattr(task, "name", "unknown") log_token_usage(task_name, task_value) # === Pipeline Tasks === @huey.task() def playground_factory_task(max_tokens: int = 100000): """Generate training data pairs from session transcripts.""" script = Path(__file__).parent / "scripts" / "pipeline_playground_factory.sh" return _run_pipeline("playground-factory", str(script), max_tokens) @huey.task() def training_factory_task(max_tokens: int = 150000): """Run model fine-tuning with generated training data.""" script = Path(__file__).parent / "scripts" / "pipeline_training_factory.sh" return _run_pipeline("training-factory", str(script), max_tokens) @huey.task() def knowledge_mine_task(max_tokens: int = 80000): """Extract structured knowledge from session archives.""" script = Path(__file__).parent / "scripts" / "pipeline_knowledge_mine.sh" return _run_pipeline("knowledge-mine", str(script), max_tokens) @huey.task() def adversary_task(max_tokens: int = 50000): """Run adversarial red-team prompts against fleet models.""" script = Path(__file__).parent / "scripts" / "pipeline_adversary.sh" return _run_pipeline("adversary", str(script), max_tokens) @huey.task() def codebase_genome_task(max_tokens: int = 120000): """Generate GENOME.md for one Gitea repo per run.""" script = Path(__file__).parent / "scripts" / "pipeline_codebase_genome.sh" return _run_pipeline("codebase-genome", str(script), max_tokens) # === Pipeline Runner === def _run_pipeline(name: str, script: str, max_tokens: int) -> dict: """Run a pipeline script and return structured result with token counts.""" if not check_budget(name, max_tokens): return { "pipeline": name, "status": "skipped", "reason": "budget_exhausted", "input_tokens": 0, "output_tokens": 0, } if not os.path.isfile(script): logger.error(f"Pipeline script not found: {script}") return { "pipeline": name, "status": "failed", "reason": "script_not_found", "input_tokens": 0, "output_tokens": 0, } logger.info(f"Starting pipeline: {name} (max_tokens={max_tokens})") try: result = subprocess.run( ["bash", script, "--max-tokens", str(max_tokens)], capture_output=True, text=True, timeout=3600, # 1 hour max per pipeline ) # Parse token usage from script output input_tokens = 0 output_tokens = 0 for line in result.stdout.split("\n"): if "tokens used" in line.lower(): try: # Extract number from lines like "... 5000 tokens used." parts = line.split() for i, p in enumerate(parts): if p.isdigit() and i + 1 < len(parts) and "token" in parts[i+1].lower(): output_tokens = int(p) break except (ValueError, IndexError): pass return { "pipeline": name, "status": "success" if result.returncode == 0 else "failed", "exit_code": result.returncode, "input_tokens": input_tokens, "output_tokens": output_tokens, "stdout_tail": result.stdout[-500:] if result.stdout else "", "stderr_tail": result.stderr[-500:] if result.stderr else "", } except subprocess.TimeoutExpired: logger.error(f"Pipeline {name} timed out after 3600s") return { "pipeline": name, "status": "timeout", "input_tokens": 0, "output_tokens": 0, } except Exception as e: logger.error(f"Pipeline {name} error: {e}") return { "pipeline": name, "status": "error", "reason": str(e), "input_tokens": 0, "output_tokens": 0, } # === Scheduler Interface === def schedule_nightly(): """Schedule all pipelines in priority order with dependencies. Called by the nightly scheduler or manually. Playground and training start in parallel; others follow dependency chain. """ # Phase 1: playground + training (no deps, parallel) playground_factory_task(max_tokens=100000) training_factory_task(max_tokens=150000) # Phase 2: knowledge-mine (depends on training) knowledge_mine_task(max_tokens=80000).then(adversary_task, max_tokens=50000) # Phase 3: codebase-genome (independent) codebase_genome_task(max_tokens=120000) logger.info("Nightly pipeline schedule dispatched")