fix: integrate token tracker with orchestrator (#634 )

- Add log_token_usage() to record input/output tokens per task - Add Huey SIGNAL_COMPLETE hook for automatic logging - Auto-detect pipeline name from task function name - Append to ~/.hermes/token_usage.jsonl for downstream analysis Related: #622 (token budget tracker)
2026-04-15 01:12:08 +00:00
8 changed files with 49 additions and 308 deletions
--- a/bin/nostr-agent-demo.py
+++ b/bin/nostr-agent-demo.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 """
 Full Nostr agent-to-agent communication demo - FINAL WORKING
 """
--- a/bin/preflight-provider-check.py
+++ b/bin/preflight-provider-check.py
@@ -1,271 +0,0 @@
-#!/usr/bin/env python3
-"""
-Pre-Flight Provider Check Script
-Issue #508: [Robustness] Credential drain detection — provider health checks
-
-Pre-flight check before session launch: verifies provider credentials and balance.
-
-Usage:
-  python3 preflight-provider-check.py              # Check all providers
-  python3 preflight-provider-check.py --launch      # Check and return exit code
-  python3 preflight-provider-check.py --balance     # Check OpenRouter balance
-"""
-
-import os, sys, json, yaml, urllib.request
-from datetime import datetime, timezone
-from pathlib import Path
-
-# Configuration
-HERMES_HOME = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
-LOG_DIR = Path.home() / ".local" / "timmy" / "fleet-health"
-LOG_FILE = LOG_DIR / "preflight-check.log"
-
-def log(msg):
-    """Log message to file and optionally console."""
-    timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
-    log_entry = "[" + timestamp + "] " + msg
-    
-    LOG_DIR.mkdir(parents=True, exist_ok=True)
-    with open(LOG_FILE, "a") as f:
-        f.write(log_entry + "\n")
-    
-    if "--quiet" not in sys.argv:
-        print(log_entry)
-
-def get_provider_api_key(provider):
-    """Get API key for a provider from .env or environment."""
-    env_file = HERMES_HOME / ".env"
-    if env_file.exists():
-        with open(env_file) as f:
-            for line in f:
-                line = line.strip()
-                if line.startswith(provider.upper() + "_API_KEY="):
-                    return line.split("=", 1)[1].strip().strip("'\"")
-    
-    return os.environ.get(provider.upper() + "_API_KEY")
-
-def check_openrouter_balance(api_key):
-    """Check OpenRouter balance via /api/v1/auth/key."""
-    if not api_key:
-        return False, "No API key", 0
-    
-    try:
-        req = urllib.request.Request(
-            "https://openrouter.ai/api/v1/auth/key",
-            headers={"Authorization": "Bearer " + api_key}
-        )
-        resp = urllib.request.urlopen(req, timeout=10)
-        data = json.loads(resp.read())
-        
-        # Check for credits
-        credits = data.get("data", {}).get("limit", 0)
-        usage = data.get("data", {}).get("usage", 0)
-        remaining = credits - usage if credits else None
-        
-        if remaining is not None and remaining <= 0:
-            return False, "No credits remaining", 0
-        elif remaining is not None:
-            return True, "Credits available", remaining
-        else:
-            return True, "Unlimited or unknown balance", None
-    
-    except urllib.error.HTTPError as e:
-        if e.code == 401:
-            return False, "Invalid API key", 0
-        else:
-            return False, "HTTP " + str(e.code), 0
-    except Exception as e:
-        return False, str(e)[:100], 0
-
-def check_nous_key(api_key):
-    """Check Nous API key with minimal test call."""
-    if not api_key:
-        return False, "No API key"
-    
-    try:
-        req = urllib.request.Request(
-            "https://inference.nousresearch.com/v1/models",
-            headers={"Authorization": "Bearer " + api_key}
-        )
-        resp = urllib.request.urlopen(req, timeout=10)
-        
-        if resp.status == 200:
-            return True, "Valid key"
-        else:
-            return False, "HTTP " + str(resp.status)
-    
-    except urllib.error.HTTPError as e:
-        if e.code == 401:
-            return False, "Invalid API key"
-        elif e.code == 403:
-            return False, "Forbidden"
-        else:
-            return False, "HTTP " + str(e.code)
-    except Exception as e:
-        return False, str(e)[:100]
-
-def check_anthropic_key(api_key):
-    """Check Anthropic API key with minimal test call."""
-    if not api_key:
-        return False, "No API key"
-    
-    try:
-        req = urllib.request.Request(
-            "https://api.anthropic.com/v1/models",
-            headers={
-                "x-api-key": api_key,
-                "anthropic-version": "2023-06-01"
-            }
-        )
-        resp = urllib.request.urlopen(req, timeout=10)
-        
-        if resp.status == 200:
-            return True, "Valid key"
-        else:
-            return False, "HTTP " + str(resp.status)
-    
-    except urllib.error.HTTPError as e:
-        if e.code == 401:
-            return False, "Invalid API key"
-        elif e.code == 403:
-            return False, "Forbidden"
-        else:
-            return False, "HTTP " + str(e.code)
-    except Exception as e:
-        return False, str(e)[:100]
-
-def check_ollama():
-    """Check if Ollama is running."""
-    try:
-        req = urllib.request.Request("http://localhost:11434/api/tags")
-        resp = urllib.request.urlopen(req, timeout=5)
-        
-        if resp.status == 200:
-            data = json.loads(resp.read())
-            models = data.get("models", [])
-            return True, str(len(models)) + " models loaded"
-        else:
-            return False, "HTTP " + str(resp.status)
-    
-    except Exception as e:
-        return False, str(e)[:100]
-
-def get_configured_provider():
-    """Get the configured provider from global config."""
-    config_file = HERMES_HOME / "config.yaml"
-    if not config_file.exists():
-        return None
-    
-    try:
-        with open(config_file) as f:
-            config = yaml.safe_load(f)
-        
-        model_config = config.get("model", {})
-        if isinstance(model_config, dict):
-            return model_config.get("provider")
-    except:
-        pass
-    
-    return None
-
-def run_preflight_check():
-    """Run pre-flight check on all providers."""
-    log("=== Pre-Flight Provider Check ===")
-    
-    results = {}
-    
-    # Check OpenRouter
-    or_key = get_provider_api_key("openrouter")
-    or_ok, or_msg, or_balance = check_openrouter_balance(or_key)
-    results["openrouter"] = {"healthy": or_ok, "message": or_msg, "balance": or_balance}
-    
-    # Check Nous
-    nous_key = get_provider_api_key("nous")
-    nous_ok, nous_msg = check_nous_key(nous_key)
-    results["nous"] = {"healthy": nous_ok, "message": nous_msg}
-    
-    # Check Anthropic
-    anthropic_key = get_provider_api_key("anthropic")
-    anthropic_ok, anthropic_msg = check_anthropic_key(anthropic_key)
-    results["anthropic"] = {"healthy": anthropic_ok, "message": anthropic_msg}
-    
-    # Check Ollama
-    ollama_ok, ollama_msg = check_ollama()
-    results["ollama"] = {"healthy": ollama_ok, "message": ollama_msg}
-    
-    # Get configured provider
-    configured = get_configured_provider()
-    
-    # Summary
-    healthy_count = sum(1 for r in results.values() if r["healthy"])
-    total_count = len(results)
-    
-    log("Results: " + str(healthy_count) + "/" + str(total_count) + " providers healthy")
-    
-    for provider, result in results.items():
-        status = "HEALTHY" if result["healthy"] else "UNHEALTHY"
-        extra = ""
-        if provider == "openrouter" and result.get("balance") is not None:
-            extra = " (balance: " + str(result["balance"]) + ")"
-        
-        log("  " + provider + ": " + status + " - " + result["message"] + extra)
-    
-    if configured:
-        log("Configured provider: " + configured)
-        if configured in results and not results[configured]["healthy"]:
-            log("WARNING: Configured provider " + configured + " is UNHEALTHY!")
-    
-    return results, configured
-
-def check_launch_readiness():
-    """Check if we're ready to launch sessions."""
-    results, configured = run_preflight_check()
-    
-    # Check if configured provider is healthy
-    if configured and configured in results:
-        if not results[configured]["healthy"]:
-            log("LAUNCH BLOCKED: Configured provider " + configured + " is unhealthy")
-            return False, configured + " is unhealthy"
-    
-    # Check if at least one provider is healthy
-    healthy_providers = [p for p, r in results.items() if r["healthy"]]
-    if not healthy_providers:
-        log("LAUNCH BLOCKED: No healthy providers available")
-        return False, "No healthy providers"
-    
-    log("LAUNCH READY: " + str(len(healthy_providers)) + " healthy providers available")
-    return True, "Ready"
-
-def show_balance():
-    """Show OpenRouter balance."""
-    api_key = get_provider_api_key("openrouter")
-    if not api_key:
-        print("No OpenRouter API key found")
-        return
-    
-    ok, msg, balance = check_openrouter_balance(api_key)
-    
-    if ok:
-        if balance is not None:
-            print("OpenRouter balance: " + str(balance) + " credits")
-        else:
-            print("OpenRouter: " + msg)
-    else:
-        print("OpenRouter: " + msg)
-
-def main():
-    if "--balance" in sys.argv:
-        show_balance()
-    elif "--launch" in sys.argv:
-        ready, message = check_launch_readiness()
-        if ready:
-            print("READY")
-            sys.exit(0)
-        else:
-            print("BLOCKED: " + message)
-            sys.exit(1)
-    else:
-        run_preflight_check()
-
-if __name__ == "__main__":
-    main()
--- a/bin/soul_eval_gate.py
+++ b/bin/soul_eval_gate.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 """
 Soul Eval Gate — The Conscience of the Training Pipeline

--- a/cron/jobs.json
+++ b/cron/jobs.json
@@ -196,37 +196,7 @@
      "paused_reason": null,
      "skills": [],
      "skill": null
-    },
-    {
-      "id": "tmux-supervisor-513",
-      "name": "Autonomous Cron Supervisor",
-      "prompt": "Load the tmux-supervisor skill and execute the monitoring protocol.\n\nCheck both `dev` and `timmy` tmux sessions for idle panes. Only send Telegram notifications on actionable events (idle, overflow, failure). Be silent when all agents are working.\n\nSteps:\n1. List all tmux sessions (skip 'Alexander')\n2. For each session, list windows and panes\n3. Capture each pane and classify state (idle vs active)\n4. For idle panes: read context, craft context-aware prompt\n5. Send /queue prompts to idle panes\n6. Verify prompts landed\n7. Only notify via Telegram if:\n   - A pane was prompted (idle detected)\n   - A pane shows context overflow (>80%)\n   - A pane is stuck or crashed\n8. If all panes are active: respond with [SILENT]",
-      "schedule": {
-        "kind": "interval",
-        "minutes": 7,
-        "display": "every 7m"
-      },
-      "schedule_display": "every 7m",
-      "repeat": {
-        "times": null,
-        "completed": 0
-      },
-      "enabled": true,
-      "created_at": "2026-04-15T03:00:00.000000+00:00",
-      "next_run_at": null,
-      "last_run_at": null,
-      "last_status": null,
-      "last_error": null,
-      "deliver": "telegram",
-      "origin": null,
-      "state": "scheduled",
-      "paused_at": null,
-      "paused_reason": null,
-      "skills": [
-        "tmux-supervisor"
-      ],
-      "skill": "tmux-supervisor"
    }
  ],
  "updated_at": "2026-04-13T02:00:00+00:00"
-}
+}
--- a/orchestration.py
+++ b/orchestration.py
@@ -1,6 +1,53 @@
 """Sovereign orchestration — Huey replaces 3,843 lines of homebrew."""

-from huey import SqliteHuey, crontab
+import json
+import os
+from datetime import datetime, timezone
 from pathlib import Path

+from huey import SqliteHuey, signals
+
 huey = SqliteHuey(filename=str(Path.home() / ".hermes" / "orchestration.db"))
+
+# === Token Tracking ===
+TOKEN_LOG = Path.home() / ".hermes" / "token_usage.jsonl"
+
+
+def log_token_usage(task_name, result):
+    """Log token usage from a completed pipeline task.
+    
+    Reads input_tokens/output_tokens from the agent result dict.
+    Auto-detects pipeline name from task context.
+    Appends to JSONL for downstream analysis.
+    """
+    if not isinstance(result, dict):
+        return
+    
+    input_tokens = result.get("input_tokens", 0)
+    output_tokens = result.get("output_tokens", 0)
+    
+    if input_tokens == 0 and output_tokens == 0:
+        return
+    
+    # Auto-detect pipeline name from task function name
+    pipeline = task_name.replace("_task", "").replace("_", "-")
+    
+    entry = {
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "pipeline": pipeline,
+        "input_tokens": input_tokens,
+        "output_tokens": output_tokens,
+        "total_tokens": input_tokens + output_tokens,
+        "task": task_name,
+    }
+    
+    TOKEN_LOG.parent.mkdir(parents=True, exist_ok=True)
+    with open(TOKEN_LOG, "a") as f:
+        f.write(json.dumps(entry) + "\n")
+
+
+@huey.signal(signals.SIGNAL_COMPLETE)
+def on_task_complete(signal, task, task_value=None, **kwargs):
+    """Huey hook: log token usage after each pipeline task completes."""
+    task_name = getattr(task, "name", "unknown")
+    log_token_usage(task_name, task_value)
--- a/scripts/captcha_bypass_handler.py
+++ b/scripts/captcha_bypass_handler.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 import json
 from hermes_tools import browser_navigate, browser_vision

--- a/scripts/diagram_meaning_extractor.py
+++ b/scripts/diagram_meaning_extractor.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 import json
 from hermes_tools import browser_navigate, browser_vision

--- a/scripts/visual_pr_reviewer.py
+++ b/scripts/visual_pr_reviewer.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 import json
 from hermes_tools import browser_navigate, browser_vision