- Resolve decisions.md merge conflict (keep both Codex boundary + Ezra/Bezalel entries) - Update .gitignore: protect bare secret files, exclude venvs and nexus-localhost - Add uniwizard tools (mention watcher, adaptive prompt router, self-grader, classifiers) - Add briefings, good-morning reports, production reports - Add evennia world scaffold and training data - Add angband and morrowind MCP servers - Add diagrams, specs, test results, overnight loop scripts - Add twitter archive insights and media metadata - Add wizard workspaces (allegro, nahshon)
235 lines
8.3 KiB
Python
235 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Timmy overnight tightening loop.
|
|
|
|
Runs a series of small, grounded tasks against local llama-server via Hermes.
|
|
Each task is deliberately simple: one or two tool calls max.
|
|
Logs every result with timing data.
|
|
|
|
Goal: rich data about local Timmy's tool-use capability by morning.
|
|
"""
|
|
|
|
import io
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
import traceback
|
|
from contextlib import redirect_stderr, redirect_stdout
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
# ── Config ──────────────────────────────────────────────────────────
|
|
AGENT_DIR = Path.home() / ".hermes" / "hermes-agent"
|
|
RESULTS_DIR = Path.home() / ".timmy" / "overnight-loop"
|
|
RESULTS_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
SYSTEM_PROMPT = """You are Timmy. You run locally on llama.cpp.
|
|
You MUST use the tools provided. Do not narrate tool calls as text.
|
|
When asked to read a file, call the read_file tool.
|
|
When asked to write a file, call the write_file tool.
|
|
When asked to search, call the search_files tool.
|
|
Be brief. Do the task. Report what you found."""
|
|
|
|
MAX_TURNS_PER_TASK = 5
|
|
TASK_TIMEOUT = 120 # seconds
|
|
|
|
# ── Tasks ───────────────────────────────────────────────────────────
|
|
TASKS = [
|
|
{
|
|
"id": "read-soul",
|
|
"toolsets": "file",
|
|
"prompt": "Read the file ~/.timmy/SOUL.md. Quote the first sentence of the Prime Directive section.",
|
|
},
|
|
{
|
|
"id": "read-operations",
|
|
"toolsets": "file",
|
|
"prompt": "Read the file ~/.timmy/OPERATIONS.md. How many sections does it have? List their headings.",
|
|
},
|
|
{
|
|
"id": "read-decisions",
|
|
"toolsets": "file",
|
|
"prompt": "Read the file ~/.timmy/decisions.md. What is the most recent decision entry? Quote its date and title.",
|
|
},
|
|
{
|
|
"id": "read-config",
|
|
"toolsets": "file",
|
|
"prompt": "Read the file ~/.hermes/config.yaml. What model and provider are configured as default?",
|
|
},
|
|
{
|
|
"id": "write-observation",
|
|
"toolsets": "file",
|
|
"prompt": f"Write a file to {RESULTS_DIR}/timmy_wrote_this.md with exactly this content:\n# Timmy was here\nTimestamp: {{timestamp}}\nI wrote this file using the write_file tool.\nSovereignty and service always.",
|
|
},
|
|
{
|
|
"id": "search-cloud-markers",
|
|
"toolsets": "file",
|
|
"prompt": "Search files in ~/.hermes/bin/ for the string 'chatgpt.com'. Report which files contain it and on which lines.",
|
|
},
|
|
{
|
|
"id": "search-soul-keyword",
|
|
"toolsets": "file",
|
|
"prompt": "Search ~/.timmy/SOUL.md for the word 'sovereignty'. How many times does it appear?",
|
|
},
|
|
{
|
|
"id": "list-bin-scripts",
|
|
"toolsets": "file",
|
|
"prompt": "Search for files matching *.sh in ~/.hermes/bin/. List the first 10 filenames.",
|
|
},
|
|
{
|
|
"id": "read-and-summarize",
|
|
"toolsets": "file",
|
|
"prompt": "Read ~/.timmy/SOUL.md. In exactly one sentence, what is Timmy's position on honesty?",
|
|
},
|
|
{
|
|
"id": "multi-read",
|
|
"toolsets": "file",
|
|
"prompt": "Read both ~/.timmy/SOUL.md and ~/.hermes/config.yaml. Does the config honor the soul's requirement to not phone home? Answer yes or no with one sentence of evidence.",
|
|
},
|
|
]
|
|
|
|
|
|
def run_task(task: dict, run_number: int) -> dict:
|
|
"""Run a single task and return result dict."""
|
|
task_id = task["id"]
|
|
prompt = task["prompt"].replace("{timestamp}", datetime.now().isoformat())
|
|
toolsets = task["toolsets"]
|
|
|
|
result = {
|
|
"task_id": task_id,
|
|
"run": run_number,
|
|
"started_at": datetime.now().isoformat(),
|
|
"prompt": prompt,
|
|
"toolsets": toolsets,
|
|
}
|
|
|
|
sys.path.insert(0, str(AGENT_DIR))
|
|
try:
|
|
from hermes_cli.runtime_provider import resolve_runtime_provider
|
|
from run_agent import AIAgent
|
|
|
|
runtime = resolve_runtime_provider()
|
|
|
|
buf_out = io.StringIO()
|
|
buf_err = io.StringIO()
|
|
|
|
agent = AIAgent(
|
|
model=runtime.get("model", "hermes4:14b"),
|
|
api_key=runtime.get("api_key"),
|
|
base_url=runtime.get("base_url"),
|
|
provider=runtime.get("provider"),
|
|
api_mode=runtime.get("api_mode"),
|
|
max_iterations=MAX_TURNS_PER_TASK,
|
|
quiet_mode=True,
|
|
ephemeral_system_prompt=SYSTEM_PROMPT,
|
|
skip_context_files=True,
|
|
skip_memory=True,
|
|
enabled_toolsets=[toolsets] if toolsets else None,
|
|
)
|
|
|
|
start = time.time()
|
|
with redirect_stdout(buf_out), redirect_stderr(buf_err):
|
|
conv_result = agent.run_conversation(prompt, sync_honcho=False)
|
|
elapsed = time.time() - start
|
|
|
|
result["elapsed_seconds"] = round(elapsed, 2)
|
|
result["response"] = conv_result.get("final_response", "")[:2000]
|
|
result["session_id"] = getattr(agent, "session_id", None)
|
|
result["provider"] = runtime.get("provider")
|
|
result["base_url"] = runtime.get("base_url")
|
|
result["model"] = runtime.get("model")
|
|
result["tool_calls_made"] = conv_result.get("tool_calls_count", 0)
|
|
result["status"] = "pass" if conv_result.get("final_response") else "empty"
|
|
result["stdout"] = buf_out.getvalue()[:500]
|
|
result["stderr"] = buf_err.getvalue()[:500]
|
|
|
|
except Exception as exc:
|
|
result["elapsed_seconds"] = round(time.time() - start if 'start' in dir() else 0, 2)
|
|
result["status"] = "error"
|
|
result["error"] = str(exc)
|
|
result["traceback"] = traceback.format_exc()[-1000:]
|
|
|
|
result["finished_at"] = datetime.now().isoformat()
|
|
return result
|
|
|
|
|
|
def main():
|
|
run_id = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
log_path = RESULTS_DIR / f"overnight_run_{run_id}.jsonl"
|
|
summary_path = RESULTS_DIR / f"overnight_summary_{run_id}.md"
|
|
|
|
print(f"=== Timmy Overnight Loop ===")
|
|
print(f"Run ID: {run_id}")
|
|
print(f"Tasks: {len(TASKS)}")
|
|
print(f"Log: {log_path}")
|
|
print(f"Max turns per task: {MAX_TURNS_PER_TASK}")
|
|
print()
|
|
|
|
results = []
|
|
cycle = 0
|
|
|
|
# Run continuously until killed
|
|
while True:
|
|
cycle += 1
|
|
print(f"--- Cycle {cycle} ({datetime.now().strftime('%H:%M:%S')}) ---")
|
|
|
|
for task in TASKS:
|
|
task_id = task["id"]
|
|
print(f" [{task_id}] ", end="", flush=True)
|
|
|
|
result = run_task(task, cycle)
|
|
results.append(result)
|
|
|
|
# Append to JSONL log
|
|
with open(log_path, "a") as f:
|
|
f.write(json.dumps(result) + "\n")
|
|
|
|
status = result["status"]
|
|
elapsed = result.get("elapsed_seconds", "?")
|
|
print(f"{status} ({elapsed}s)")
|
|
|
|
# Brief pause between tasks
|
|
time.sleep(2)
|
|
|
|
# Write summary after each cycle
|
|
passes = sum(1 for r in results if r["status"] == "pass")
|
|
errors = sum(1 for r in results if r["status"] == "error")
|
|
empties = sum(1 for r in results if r["status"] == "empty")
|
|
total = len(results)
|
|
avg_time = sum(r.get("elapsed_seconds", 0) for r in results) / max(total, 1)
|
|
|
|
summary = f"""# Timmy Overnight Loop — Summary
|
|
Run ID: {run_id}
|
|
Generated: {datetime.now().isoformat()}
|
|
Cycles completed: {cycle}
|
|
Total tasks run: {total}
|
|
|
|
## Aggregate
|
|
- Pass: {passes}/{total} ({100*passes//max(total,1)}%)
|
|
- Empty: {empties}/{total}
|
|
- Error: {errors}/{total}
|
|
- Avg response time: {avg_time:.1f}s
|
|
|
|
## Per-task results (latest cycle)
|
|
"""
|
|
cycle_results = [r for r in results if r["run"] == cycle]
|
|
for r in cycle_results:
|
|
resp_preview = r.get("response", "")[:100].replace("\n", " ")
|
|
summary += f"- **{r['task_id']}**: {r['status']} ({r.get('elapsed_seconds','?')}s) — {resp_preview}\n"
|
|
|
|
summary += f"\n## Error details\n"
|
|
for r in results:
|
|
if r["status"] == "error":
|
|
summary += f"- {r['task_id']} (cycle {r['run']}): {r.get('error','?')}\n"
|
|
|
|
with open(summary_path, "w") as f:
|
|
f.write(summary)
|
|
|
|
print(f"\n Cycle {cycle} done. Pass={passes} Error={errors} Empty={empties} Avg={avg_time:.1f}s")
|
|
print(f" Summary: {summary_path}")
|
|
print(f" Sleeping 30s before next cycle...\n")
|
|
time.sleep(30)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|