#!/usr/bin/env python3 """Timmy overnight tightening loop. Runs a series of small, grounded tasks against local llama-server via Hermes. Each task is deliberately simple: one or two tool calls max. Logs every result with timing data. Goal: rich data about local Timmy's tool-use capability by morning. """ import io import json import os import sys import time import traceback from contextlib import redirect_stderr, redirect_stdout from datetime import datetime from pathlib import Path # ── Config ────────────────────────────────────────────────────────── AGENT_DIR = Path.home() / ".hermes" / "hermes-agent" RESULTS_DIR = Path.home() / ".timmy" / "overnight-loop" RESULTS_DIR.mkdir(parents=True, exist_ok=True) SYSTEM_PROMPT = """You are Timmy. You run locally on llama.cpp. You MUST use the tools provided. Do not narrate tool calls as text. When asked to read a file, call the read_file tool. When asked to write a file, call the write_file tool. When asked to search, call the search_files tool. Be brief. Do the task. Report what you found.""" MAX_TURNS_PER_TASK = 5 TASK_TIMEOUT = 120 # seconds # ── Tasks ─────────────────────────────────────────────────────────── TASKS = [ { "id": "read-soul", "toolsets": "file", "prompt": "Read the file ~/.timmy/SOUL.md. Quote the first sentence of the Prime Directive section.", }, { "id": "read-operations", "toolsets": "file", "prompt": "Read the file ~/.timmy/OPERATIONS.md. How many sections does it have? List their headings.", }, { "id": "read-decisions", "toolsets": "file", "prompt": "Read the file ~/.timmy/decisions.md. What is the most recent decision entry? Quote its date and title.", }, { "id": "read-config", "toolsets": "file", "prompt": "Read the file ~/.hermes/config.yaml. What model and provider are configured as default?", }, { "id": "write-observation", "toolsets": "file", "prompt": f"Write a file to {RESULTS_DIR}/timmy_wrote_this.md with exactly this content:\n# Timmy was here\nTimestamp: {{timestamp}}\nI wrote this file using the write_file tool.\nSovereignty and service always.", }, { "id": "search-cloud-markers", "toolsets": "file", "prompt": "Search files in ~/.hermes/bin/ for the string 'chatgpt.com'. Report which files contain it and on which lines.", }, { "id": "search-soul-keyword", "toolsets": "file", "prompt": "Search ~/.timmy/SOUL.md for the word 'sovereignty'. How many times does it appear?", }, { "id": "list-bin-scripts", "toolsets": "file", "prompt": "Search for files matching *.sh in ~/.hermes/bin/. List the first 10 filenames.", }, { "id": "read-and-summarize", "toolsets": "file", "prompt": "Read ~/.timmy/SOUL.md. In exactly one sentence, what is Timmy's position on honesty?", }, { "id": "multi-read", "toolsets": "file", "prompt": "Read both ~/.timmy/SOUL.md and ~/.hermes/config.yaml. Does the config honor the soul's requirement to not phone home? Answer yes or no with one sentence of evidence.", }, ] def run_task(task: dict, run_number: int) -> dict: """Run a single task and return result dict.""" task_id = task["id"] prompt = task["prompt"].replace("{timestamp}", datetime.now().isoformat()) toolsets = task["toolsets"] result = { "task_id": task_id, "run": run_number, "started_at": datetime.now().isoformat(), "prompt": prompt, "toolsets": toolsets, } sys.path.insert(0, str(AGENT_DIR)) try: from hermes_cli.runtime_provider import resolve_runtime_provider from run_agent import AIAgent runtime = resolve_runtime_provider() buf_out = io.StringIO() buf_err = io.StringIO() agent = AIAgent( model=runtime.get("model", "hermes4:14b"), api_key=runtime.get("api_key"), base_url=runtime.get("base_url"), provider=runtime.get("provider"), api_mode=runtime.get("api_mode"), max_iterations=MAX_TURNS_PER_TASK, quiet_mode=True, ephemeral_system_prompt=SYSTEM_PROMPT, skip_context_files=True, skip_memory=True, enabled_toolsets=[toolsets] if toolsets else None, ) start = time.time() with redirect_stdout(buf_out), redirect_stderr(buf_err): conv_result = agent.run_conversation(prompt, sync_honcho=False) elapsed = time.time() - start result["elapsed_seconds"] = round(elapsed, 2) result["response"] = conv_result.get("final_response", "")[:2000] result["session_id"] = getattr(agent, "session_id", None) result["provider"] = runtime.get("provider") result["base_url"] = runtime.get("base_url") result["model"] = runtime.get("model") result["tool_calls_made"] = conv_result.get("tool_calls_count", 0) result["status"] = "pass" if conv_result.get("final_response") else "empty" result["stdout"] = buf_out.getvalue()[:500] result["stderr"] = buf_err.getvalue()[:500] except Exception as exc: result["elapsed_seconds"] = round(time.time() - start if 'start' in dir() else 0, 2) result["status"] = "error" result["error"] = str(exc) result["traceback"] = traceback.format_exc()[-1000:] result["finished_at"] = datetime.now().isoformat() return result def main(): run_id = datetime.now().strftime("%Y%m%d_%H%M%S") log_path = RESULTS_DIR / f"overnight_run_{run_id}.jsonl" summary_path = RESULTS_DIR / f"overnight_summary_{run_id}.md" print(f"=== Timmy Overnight Loop ===") print(f"Run ID: {run_id}") print(f"Tasks: {len(TASKS)}") print(f"Log: {log_path}") print(f"Max turns per task: {MAX_TURNS_PER_TASK}") print() results = [] cycle = 0 # Run continuously until killed while True: cycle += 1 print(f"--- Cycle {cycle} ({datetime.now().strftime('%H:%M:%S')}) ---") for task in TASKS: task_id = task["id"] print(f" [{task_id}] ", end="", flush=True) result = run_task(task, cycle) results.append(result) # Append to JSONL log with open(log_path, "a") as f: f.write(json.dumps(result) + "\n") status = result["status"] elapsed = result.get("elapsed_seconds", "?") print(f"{status} ({elapsed}s)") # Brief pause between tasks time.sleep(2) # Write summary after each cycle passes = sum(1 for r in results if r["status"] == "pass") errors = sum(1 for r in results if r["status"] == "error") empties = sum(1 for r in results if r["status"] == "empty") total = len(results) avg_time = sum(r.get("elapsed_seconds", 0) for r in results) / max(total, 1) summary = f"""# Timmy Overnight Loop — Summary Run ID: {run_id} Generated: {datetime.now().isoformat()} Cycles completed: {cycle} Total tasks run: {total} ## Aggregate - Pass: {passes}/{total} ({100*passes//max(total,1)}%) - Empty: {empties}/{total} - Error: {errors}/{total} - Avg response time: {avg_time:.1f}s ## Per-task results (latest cycle) """ cycle_results = [r for r in results if r["run"] == cycle] for r in cycle_results: resp_preview = r.get("response", "")[:100].replace("\n", " ") summary += f"- **{r['task_id']}**: {r['status']} ({r.get('elapsed_seconds','?')}s) — {resp_preview}\n" summary += f"\n## Error details\n" for r in results: if r["status"] == "error": summary += f"- {r['task_id']} (cycle {r['run']}): {r.get('error','?')}\n" with open(summary_path, "w") as f: f.write(summary) print(f"\n Cycle {cycle} done. Pass={passes} Error={errors} Empty={empties} Avg={avg_time:.1f}s") print(f" Summary: {summary_path}") print(f" Sleeping 30s before next cycle...\n") time.sleep(30) if __name__ == "__main__": main()