chore: check in all local work — uniwizard, briefings, reports, evennia, morrowind, scripts, specs, training data, angband MCP, diagrams, twitter archive, wizards
- Resolve decisions.md merge conflict (keep both Codex boundary + Ezra/Bezalel entries)
- Update .gitignore: protect bare secret files, exclude venvs and nexus-localhost
- Add uniwizard tools (mention watcher, adaptive prompt router, self-grader, classifiers)
- Add briefings, good-morning reports, production reports
- Add evennia world scaffold and training data
- Add angband and morrowind MCP servers
- Add diagrams, specs, test results, overnight loop scripts
- Add twitter archive insights and media metadata
- Add wizard workspaces (allegro, nahshon)
2026-03-30 17:18:09 -04:00
#!/usr/bin/env python3
""" Timmy overnight tightening loop.
Runs a series of small , grounded tasks against local llama - server via Hermes .
Each task is deliberately simple : one or two tool calls max .
Logs every result with timing data .
Goal : rich data about local Timmy ' s tool-use capability by morning.
"""
import io
import json
import os
import sys
import time
import traceback
from contextlib import redirect_stderr , redirect_stdout
from datetime import datetime
from pathlib import Path
# ── Config ──────────────────────────────────────────────────────────
AGENT_DIR = Path . home ( ) / " .hermes " / " hermes-agent "
RESULTS_DIR = Path . home ( ) / " .timmy " / " overnight-loop "
RESULTS_DIR . mkdir ( parents = True , exist_ok = True )
SYSTEM_PROMPT = """ You are Timmy. You run locally on llama.cpp.
You MUST use the tools provided . Do not narrate tool calls as text .
When asked to read a file , call the read_file tool .
When asked to write a file , call the write_file tool .
When asked to search , call the search_files tool .
Be brief . Do the task . Report what you found . """
MAX_TURNS_PER_TASK = 5
TASK_TIMEOUT = 120 # seconds
# ── Tasks ───────────────────────────────────────────────────────────
TASKS = [
{
" id " : " read-soul " ,
" toolsets " : " file " ,
" prompt " : " Read the file ~/.timmy/SOUL.md. Quote the first sentence of the Prime Directive section. " ,
} ,
{
" id " : " read-operations " ,
" toolsets " : " file " ,
" prompt " : " Read the file ~/.timmy/OPERATIONS.md. How many sections does it have? List their headings. " ,
} ,
{
" id " : " read-decisions " ,
" toolsets " : " file " ,
" prompt " : " Read the file ~/.timmy/decisions.md. What is the most recent decision entry? Quote its date and title. " ,
} ,
{
" id " : " read-config " ,
" toolsets " : " file " ,
" prompt " : " Read the file ~/.hermes/config.yaml. What model and provider are configured as default? " ,
} ,
{
" id " : " write-observation " ,
" toolsets " : " file " ,
" prompt " : f " Write a file to { RESULTS_DIR } /timmy_wrote_this.md with exactly this content: \n # Timmy was here \n Timestamp: {{ timestamp }} \n I wrote this file using the write_file tool. \n Sovereignty and service always. " ,
} ,
{
" id " : " search-cloud-markers " ,
" toolsets " : " file " ,
" prompt " : " Search files in ~/.hermes/bin/ for the string ' chatgpt.com ' . Report which files contain it and on which lines. " ,
} ,
{
" id " : " search-soul-keyword " ,
" toolsets " : " file " ,
" prompt " : " Search ~/.timmy/SOUL.md for the word ' sovereignty ' . How many times does it appear? " ,
} ,
{
" id " : " list-bin-scripts " ,
" toolsets " : " file " ,
" prompt " : " Search for files matching *.sh in ~/.hermes/bin/. List the first 10 filenames. " ,
} ,
{
" id " : " read-and-summarize " ,
" toolsets " : " file " ,
" prompt " : " Read ~/.timmy/SOUL.md. In exactly one sentence, what is Timmy ' s position on honesty? " ,
} ,
{
" id " : " multi-read " ,
" toolsets " : " file " ,
" prompt " : " Read both ~/.timmy/SOUL.md and ~/.hermes/config.yaml. Does the config honor the soul ' s requirement to not phone home? Answer yes or no with one sentence of evidence. " ,
} ,
]
def run_task ( task : dict , run_number : int ) - > dict :
""" Run a single task and return result dict. """
task_id = task [ " id " ]
prompt = task [ " prompt " ] . replace ( " {timestamp} " , datetime . now ( ) . isoformat ( ) )
toolsets = task [ " toolsets " ]
result = {
" task_id " : task_id ,
" run " : run_number ,
" started_at " : datetime . now ( ) . isoformat ( ) ,
" prompt " : prompt ,
" toolsets " : toolsets ,
}
sys . path . insert ( 0 , str ( AGENT_DIR ) )
try :
from run_agent import AIAgent
2026-04-13 02:10:05 +00:00
# Explicit Ollama provider — do NOT use resolve_runtime_provider()
# which may return 'local' (unsupported). The overnight loop always
# runs against local Ollama inference.
_model = os . environ . get ( " OVERNIGHT_MODEL " , " hermes4:14b " )
_base_url = os . environ . get ( " OVERNIGHT_BASE_URL " , " http://localhost:11434/v1 " )
_provider = " ollama "
chore: check in all local work — uniwizard, briefings, reports, evennia, morrowind, scripts, specs, training data, angband MCP, diagrams, twitter archive, wizards
- Resolve decisions.md merge conflict (keep both Codex boundary + Ezra/Bezalel entries)
- Update .gitignore: protect bare secret files, exclude venvs and nexus-localhost
- Add uniwizard tools (mention watcher, adaptive prompt router, self-grader, classifiers)
- Add briefings, good-morning reports, production reports
- Add evennia world scaffold and training data
- Add angband and morrowind MCP servers
- Add diagrams, specs, test results, overnight loop scripts
- Add twitter archive insights and media metadata
- Add wizard workspaces (allegro, nahshon)
2026-03-30 17:18:09 -04:00
buf_out = io . StringIO ( )
buf_err = io . StringIO ( )
agent = AIAgent (
2026-04-13 02:10:05 +00:00
model = _model ,
base_url = _base_url ,
provider = _provider ,
api_mode = " chat_completions " ,
chore: check in all local work — uniwizard, briefings, reports, evennia, morrowind, scripts, specs, training data, angband MCP, diagrams, twitter archive, wizards
- Resolve decisions.md merge conflict (keep both Codex boundary + Ezra/Bezalel entries)
- Update .gitignore: protect bare secret files, exclude venvs and nexus-localhost
- Add uniwizard tools (mention watcher, adaptive prompt router, self-grader, classifiers)
- Add briefings, good-morning reports, production reports
- Add evennia world scaffold and training data
- Add angband and morrowind MCP servers
- Add diagrams, specs, test results, overnight loop scripts
- Add twitter archive insights and media metadata
- Add wizard workspaces (allegro, nahshon)
2026-03-30 17:18:09 -04:00
max_iterations = MAX_TURNS_PER_TASK ,
quiet_mode = True ,
ephemeral_system_prompt = SYSTEM_PROMPT ,
skip_context_files = True ,
skip_memory = True ,
enabled_toolsets = [ toolsets ] if toolsets else None ,
)
start = time . time ( )
with redirect_stdout ( buf_out ) , redirect_stderr ( buf_err ) :
conv_result = agent . run_conversation ( prompt , sync_honcho = False )
elapsed = time . time ( ) - start
result [ " elapsed_seconds " ] = round ( elapsed , 2 )
result [ " response " ] = conv_result . get ( " final_response " , " " ) [ : 2000 ]
result [ " session_id " ] = getattr ( agent , " session_id " , None )
2026-04-13 02:10:05 +00:00
result [ " provider " ] = _provider
result [ " base_url " ] = _base_url
result [ " model " ] = _model
chore: check in all local work — uniwizard, briefings, reports, evennia, morrowind, scripts, specs, training data, angband MCP, diagrams, twitter archive, wizards
- Resolve decisions.md merge conflict (keep both Codex boundary + Ezra/Bezalel entries)
- Update .gitignore: protect bare secret files, exclude venvs and nexus-localhost
- Add uniwizard tools (mention watcher, adaptive prompt router, self-grader, classifiers)
- Add briefings, good-morning reports, production reports
- Add evennia world scaffold and training data
- Add angband and morrowind MCP servers
- Add diagrams, specs, test results, overnight loop scripts
- Add twitter archive insights and media metadata
- Add wizard workspaces (allegro, nahshon)
2026-03-30 17:18:09 -04:00
result [ " tool_calls_made " ] = conv_result . get ( " tool_calls_count " , 0 )
result [ " status " ] = " pass " if conv_result . get ( " final_response " ) else " empty "
result [ " stdout " ] = buf_out . getvalue ( ) [ : 500 ]
result [ " stderr " ] = buf_err . getvalue ( ) [ : 500 ]
except Exception as exc :
result [ " elapsed_seconds " ] = round ( time . time ( ) - start if ' start ' in dir ( ) else 0 , 2 )
result [ " status " ] = " error "
result [ " error " ] = str ( exc )
result [ " traceback " ] = traceback . format_exc ( ) [ - 1000 : ]
result [ " finished_at " ] = datetime . now ( ) . isoformat ( )
return result
def main ( ) :
run_id = datetime . now ( ) . strftime ( " % Y % m %d _ % H % M % S " )
log_path = RESULTS_DIR / f " overnight_run_ { run_id } .jsonl "
summary_path = RESULTS_DIR / f " overnight_summary_ { run_id } .md "
print ( f " === Timmy Overnight Loop === " )
print ( f " Run ID: { run_id } " )
print ( f " Tasks: { len ( TASKS ) } " )
print ( f " Log: { log_path } " )
print ( f " Max turns per task: { MAX_TURNS_PER_TASK } " )
print ( )
results = [ ]
cycle = 0
# Run continuously until killed
while True :
cycle + = 1
print ( f " --- Cycle { cycle } ( { datetime . now ( ) . strftime ( ' % H: % M: % S ' ) } ) --- " )
for task in TASKS :
task_id = task [ " id " ]
print ( f " [ { task_id } ] " , end = " " , flush = True )
result = run_task ( task , cycle )
results . append ( result )
# Append to JSONL log
with open ( log_path , " a " ) as f :
f . write ( json . dumps ( result ) + " \n " )
status = result [ " status " ]
elapsed = result . get ( " elapsed_seconds " , " ? " )
print ( f " { status } ( { elapsed } s) " )
# Brief pause between tasks
time . sleep ( 2 )
# Write summary after each cycle
passes = sum ( 1 for r in results if r [ " status " ] == " pass " )
errors = sum ( 1 for r in results if r [ " status " ] == " error " )
empties = sum ( 1 for r in results if r [ " status " ] == " empty " )
total = len ( results )
avg_time = sum ( r . get ( " elapsed_seconds " , 0 ) for r in results ) / max ( total , 1 )
summary = f """ # Timmy Overnight Loop — Summary
Run ID : { run_id }
Generated : { datetime . now ( ) . isoformat ( ) }
Cycles completed : { cycle }
Total tasks run : { total }
## Aggregate
- Pass : { passes } / { total } ( { 100 * passes / / max ( total , 1 ) } % )
- Empty : { empties } / { total }
- Error : { errors } / { total }
- Avg response time : { avg_time : .1 f } s
## Per-task results (latest cycle)
"""
cycle_results = [ r for r in results if r [ " run " ] == cycle ]
for r in cycle_results :
resp_preview = r . get ( " response " , " " ) [ : 100 ] . replace ( " \n " , " " )
summary + = f " - ** { r [ ' task_id ' ] } **: { r [ ' status ' ] } ( { r . get ( ' elapsed_seconds ' , ' ? ' ) } s) — { resp_preview } \n "
summary + = f " \n ## Error details \n "
for r in results :
if r [ " status " ] == " error " :
summary + = f " - { r [ ' task_id ' ] } (cycle { r [ ' run ' ] } ): { r . get ( ' error ' , ' ? ' ) } \n "
with open ( summary_path , " w " ) as f :
f . write ( summary )
print ( f " \n Cycle { cycle } done. Pass= { passes } Error= { errors } Empty= { empties } Avg= { avg_time : .1f } s " )
print ( f " Summary: { summary_path } " )
print ( f " Sleeping 30s before next cycle... \n " )
time . sleep ( 30 )
if __name__ == " __main__ " :
main ( )