Add stuck initiatives audit report

2026-04-03 22:42:06 +00:00
parent dc3d975c2f
commit 56aa692d1c
1267 changed files with 1263232 additions and 0 deletions
--- a/protected/skills-backup/devops/local-timmy-overnight-loop/scripts/timmy_overnight_loop.py
+++ b/protected/skills-backup/devops/local-timmy-overnight-loop/scripts/timmy_overnight_loop.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+"""Timmy overnight tightening loop.
+
+Runs a series of small, grounded tasks against local llama-server via Hermes.
+Each task is deliberately simple: one or two tool calls max.
+Logs every result with timing data.
+
+Goal: rich data about local Timmy's tool-use capability by morning.
+
+Deploy to: ~/.timmy/scripts/timmy_overnight_loop.py
+Start with: cd ~/.hermes/hermes-agent && nohup venv/bin/python3 ~/.timmy/scripts/timmy_overnight_loop.py > ~/.timmy/overnight-loop/loop_stdout.log 2>&1 &
+"""
+
+import io
+import json
+import os
+import sys
+import time
+import traceback
+from contextlib import redirect_stderr, redirect_stdout
+from datetime import datetime
+from pathlib import Path
+
+# ── Config ──────────────────────────────────────────────────────────
+AGENT_DIR = Path.home() / ".hermes" / "hermes-agent"
+RESULTS_DIR = Path.home() / ".timmy" / "overnight-loop"
+RESULTS_DIR.mkdir(parents=True, exist_ok=True)
+
+SYSTEM_PROMPT = """You are Timmy. You run locally on llama.cpp.
+You MUST use the tools provided. Do not narrate tool calls as text.
+When asked to read a file, call the read_file tool.
+When asked to write a file, call the write_file tool.
+When asked to search, call the search_files tool.
+Be brief. Do the task. Report what you found."""
+
+MAX_TURNS_PER_TASK = 5
+TASK_TIMEOUT = 120  # seconds
+
+# ── Tasks ───────────────────────────────────────────────────────────
+TASKS = [
+    {
+        "id": "read-soul",
+        "toolsets": "file",
+        "prompt": "Read the file ~/.timmy/SOUL.md. Quote the first sentence of the Prime Directive section.",
+    },
+    {
+        "id": "read-operations",
+        "toolsets": "file",
+        "prompt": "Read the file ~/.timmy/OPERATIONS.md. How many sections does it have? List their headings.",
+    },
+    {
+        "id": "read-decisions",
+        "toolsets": "file",
+        "prompt": "Read the file ~/.timmy/decisions.md. What is the most recent decision entry? Quote its date and title.",
+    },
+    {
+        "id": "read-config",
+        "toolsets": "file",
+        "prompt": "Read the file ~/.hermes/config.yaml. What model and provider are configured as default?",
+    },
+    {
+        "id": "write-observation",
+        "toolsets": "file",
+        "prompt": "Write a file to {results_dir}/timmy_wrote_this.md with exactly this content:\n# Timmy was here\nTimestamp: {{timestamp}}\nI wrote this file using the write_file tool.\nSovereignty and service always.".format(results_dir=RESULTS_DIR),
+    },
+    {
+        "id": "search-cloud-markers",
+        "toolsets": "file",
+        "prompt": "Search files in ~/.hermes/bin/ for the string 'chatgpt.com'. Report which files contain it and on which lines.",
+    },
+    {
+        "id": "search-soul-keyword",
+        "toolsets": "file",
+        "prompt": "Search ~/.timmy/SOUL.md for the word 'sovereignty'. How many times does it appear?",
+    },
+    {
+        "id": "list-bin-scripts",
+        "toolsets": "file",
+        "prompt": "Search for files matching *.sh in ~/.hermes/bin/. List the first 10 filenames.",
+    },
+    {
+        "id": "read-and-summarize",
+        "toolsets": "file",
+        "prompt": "Read ~/.timmy/SOUL.md. In exactly one sentence, what is Timmy's position on honesty?",
+    },
+    {
+        "id": "multi-read",
+        "toolsets": "file",
+        "prompt": "Read both ~/.timmy/SOUL.md and ~/.hermes/config.yaml. Does the config honor the soul's requirement to not phone home? Answer yes or no with one sentence of evidence.",
+    },
+]
+
+
+def run_task(task, run_number):
+    """Run a single task and return result dict."""
+    task_id = task["id"]
+    prompt = task["prompt"].replace("{timestamp}", datetime.now().isoformat())
+    toolsets = task["toolsets"]
+
+    result = {
+        "task_id": task_id,
+        "run": run_number,
+        "started_at": datetime.now().isoformat(),
+        "prompt": prompt,
+        "toolsets": toolsets,
+    }
+
+    sys.path.insert(0, str(AGENT_DIR))
+    start = time.time()
+    try:
+        from hermes_cli.runtime_provider import resolve_runtime_provider
+        from run_agent import AIAgent
+
+        runtime = resolve_runtime_provider()
+
+        buf_out = io.StringIO()
+        buf_err = io.StringIO()
+
+        agent = AIAgent(
+            model=runtime.get("model", "hermes4:14b"),
+            api_key=runtime.get("api_key"),
+            base_url=runtime.get("base_url"),
+            provider=runtime.get("provider"),
+            api_mode=runtime.get("api_mode"),
+            max_iterations=MAX_TURNS_PER_TASK,
+            quiet_mode=True,
+            ephemeral_system_prompt=SYSTEM_PROMPT,
+            skip_context_files=True,
+            skip_memory=True,
+            enabled_toolsets=[toolsets] if toolsets else None,
+        )
+
+        with redirect_stdout(buf_out), redirect_stderr(buf_err):
+            conv_result = agent.run_conversation(prompt, sync_honcho=False)
+        elapsed = time.time() - start
+
+        result["elapsed_seconds"] = round(elapsed, 2)
+        result["response"] = conv_result.get("final_response", "")[:2000]
+        result["session_id"] = getattr(agent, "session_id", None)
+        result["provider"] = runtime.get("provider")
+        result["base_url"] = runtime.get("base_url")
+        result["model"] = runtime.get("model")
+        result["tool_calls_made"] = conv_result.get("tool_calls_count", 0)
+        result["status"] = "pass" if conv_result.get("final_response") else "empty"
+        result["stdout"] = buf_out.getvalue()[:500]
+        result["stderr"] = buf_err.getvalue()[:500]
+
+    except Exception as exc:
+        elapsed = time.time() - start
+        result["elapsed_seconds"] = round(elapsed, 2)
+        result["status"] = "error"
+        result["error"] = str(exc)
+        result["traceback"] = traceback.format_exc()[-1000:]
+
+    result["finished_at"] = datetime.now().isoformat()
+    return result
+
+
+def main():
+    run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
+    log_path = RESULTS_DIR / f"overnight_run_{run_id}.jsonl"
+    summary_path = RESULTS_DIR / f"overnight_summary_{run_id}.md"
+
+    print(f"=== Timmy Overnight Loop ===")
+    print(f"Run ID: {run_id}")
+    print(f"Tasks: {len(TASKS)}")
+    print(f"Log: {log_path}")
+    print(f"Max turns per task: {MAX_TURNS_PER_TASK}")
+    print()
+
+    results = []
+    cycle = 0
+
+    while True:
+        cycle += 1
+        print(f"--- Cycle {cycle} ({datetime.now().strftime('%H:%M:%S')}) ---")
+
+        for task in TASKS:
+            task_id = task["id"]
+            print(f"  [{task_id}] ", end="", flush=True)
+
+            result = run_task(task, cycle)
+            results.append(result)
+
+            with open(log_path, "a") as f:
+                f.write(json.dumps(result) + "\n")
+
+            status = result["status"]
+            elapsed = result.get("elapsed_seconds", "?")
+            print(f"{status} ({elapsed}s)")
+
+            time.sleep(2)
+
+        # Write summary
+        passes = sum(1 for r in results if r["status"] == "pass")
+        errors = sum(1 for r in results if r["status"] == "error")
+        empties = sum(1 for r in results if r["status"] == "empty")
+        total = len(results)
+        avg_time = sum(r.get("elapsed_seconds", 0) for r in results) / max(total, 1)
+
+        summary = f"""# Timmy Overnight Loop — Summary
+Run ID: {run_id}
+Generated: {datetime.now().isoformat()}
+Cycles completed: {cycle}
+Total tasks run: {total}
+
+## Aggregate
+- Pass: {passes}/{total} ({100*passes//max(total,1)}%)
+- Empty: {empties}/{total}
+- Error: {errors}/{total}
+- Avg response time: {avg_time:.1f}s
+
+## Per-task results (latest cycle)
+"""
+        cycle_results = [r for r in results if r["run"] == cycle]
+        for r in cycle_results:
+            resp_preview = r.get("response", "")[:100].replace("\n", " ")
+            summary += f"- **{r['task_id']}**: {r['status']} ({r.get('elapsed_seconds','?')}s) — {resp_preview}\n"
+
+        summary += f"\n## Error details\n"
+        for r in results:
+            if r["status"] == "error":
+                summary += f"- {r['task_id']} (cycle {r['run']}): {r.get('error','?')}\n"
+
+        with open(summary_path, "w") as f:
+            f.write(summary)
+
+        print(f"\n  Cycle {cycle} done. Pass={passes} Error={errors} Empty={empties} Avg={avg_time:.1f}s")
+        print(f"  Summary: {summary_path}")
+        print(f"  Sleeping 30s before next cycle...\n")
+        time.sleep(30)
+
+
+if __name__ == "__main__":
+    main()