scripts/local_decision_session_test.py

#!/usr/bin/env python3
import json
import time
import urllib.request
from pathlib import Path

MODEL = "NousResearch_Hermes-4-14B-Q4_K_M.gguf"
URL = "http://localhost:8081/v1/chat/completions"
SOUL = Path.home().joinpath('.timmy/SOUL.md').read_text()
OUT = Path.home().joinpath('.timmy/test-results', f'local_decision_session_{time.strftime("%Y%m%d_%H%M%S")}.md')
OUT.parent.mkdir(parents=True, exist_ok=True)

messages = [
    {"role": "system", "content": SOUL},
    {"role": "user", "content": "For this session follow three rules: 1) prefer local over cloud when both work, 2) trust live world state over stale reports, 3) if uncertain, say uncertain. Repeat those rules in one short sentence."},
]

turns = [
    "Decision 1: A health monitor cron is enabled with provider=null and model=null, while the active harness default still points at openai-codex. Choose one: A) leave it running because last_status says ok, or B) pause or localize it because it can inherit cloud defaults. Answer with the letter, then one sentence.",
    "Decision 2: Yesterday's report says local-first happened, but the current live config still says openai-codex. Which source wins and why? Two sentences max.",
    "Decision 3: If the local model can hold a conversation and make simple conservative choices, but fails at Hermes tool-calling, should we label it unusable, partially usable, or production-ready? Pick one label and justify it in one sentence.",
    "What was rule 2 from the start of this session? Answer exactly in one sentence.",
    "Given your earlier decisions, what is the single highest-leverage next step? One sentence."
]

transcript = []

def call(msgs):
    payload = {
        "model": MODEL,
        "messages": msgs,
        "stream": False,
        "temperature": 0.2,
        "max_tokens": 220,
    }
    req = urllib.request.Request(URL, data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"})
    with urllib.request.urlopen(req, timeout=120) as resp:
        data = json.loads(resp.read().decode())
    return data["choices"][0]["message"]["content"].strip(), data.get("usage", {})

first_reply, usage = call(messages)
transcript.append((messages[-1]["content"], first_reply, usage))
messages.append({"role": "assistant", "content": first_reply})

for q in turns:
    messages.append({"role": "user", "content": q})
    reply, usage = call(messages)
    transcript.append((q, reply, usage))
    messages.append({"role": "assistant", "content": reply})

report = ["# Local Decision Session Test", "", f"Model: {MODEL}", f"URL: {URL}", "", "## Transcript", ""]
for i, (q, a, usage) in enumerate(transcript, 1):
    report.append(f"### Turn {i}")
    report.append(f"User: {q}")
    report.append("")
    report.append(f"Assistant: {a}")
    report.append("")
    report.append(f"Usage: {usage}")
    report.append("")

OUT.write_text("\n".join(report))
print(str(OUT))
print("\n".join(report[:120]))
chore: check in all local work — uniwizard, briefings, reports, evennia, morrowind, scripts, specs, training data, angband MCP, diagrams, twitter archive, wizards - Resolve decisions.md merge conflict (keep both Codex boundary + Ezra/Bezalel entries) - Update .gitignore: protect bare secret files, exclude venvs and nexus-localhost - Add uniwizard tools (mention watcher, adaptive prompt router, self-grader, classifiers) - Add briefings, good-morning reports, production reports - Add evennia world scaffold and training data - Add angband and morrowind MCP servers - Add diagrams, specs, test results, overnight loop scripts - Add twitter archive insights and media metadata - Add wizard workspaces (allegro, nahshon) 2026-03-30 17:18:09 -04:00			`#!/usr/bin/env python3`
			`import json`
			`import time`
			`import urllib.request`
			`from pathlib import Path`

			`MODEL = "NousResearch_Hermes-4-14B-Q4_K_M.gguf"`
			`URL = "http://localhost:8081/v1/chat/completions"`
			`SOUL = Path.home().joinpath('.timmy/SOUL.md').read_text()`
			`OUT = Path.home().joinpath('.timmy/test-results', f'local_decision_session_{time.strftime("%Y%m%d_%H%M%S")}.md')`
			`OUT.parent.mkdir(parents=True, exist_ok=True)`

			`messages = [`
			`{"role": "system", "content": SOUL},`
			`{"role": "user", "content": "For this session follow three rules: 1) prefer local over cloud when both work, 2) trust live world state over stale reports, 3) if uncertain, say uncertain. Repeat those rules in one short sentence."},`
			`]`

			`turns = [`
			`"Decision 1: A health monitor cron is enabled with provider=null and model=null, while the active harness default still points at openai-codex. Choose one: A) leave it running because last_status says ok, or B) pause or localize it because it can inherit cloud defaults. Answer with the letter, then one sentence.",`
			`"Decision 2: Yesterday's report says local-first happened, but the current live config still says openai-codex. Which source wins and why? Two sentences max.",`
			`"Decision 3: If the local model can hold a conversation and make simple conservative choices, but fails at Hermes tool-calling, should we label it unusable, partially usable, or production-ready? Pick one label and justify it in one sentence.",`
			`"What was rule 2 from the start of this session? Answer exactly in one sentence.",`
			`"Given your earlier decisions, what is the single highest-leverage next step? One sentence."`
			`]`

			`transcript = []`

			`def call(msgs):`
			`payload = {`
			`"model": MODEL,`
			`"messages": msgs,`
			`"stream": False,`
			`"temperature": 0.2,`
			`"max_tokens": 220,`
			`}`
			`req = urllib.request.Request(URL, data=json.dumps(payload).encode(), headers={"Content-Type": "application/json"})`
			`with urllib.request.urlopen(req, timeout=120) as resp:`
			`data = json.loads(resp.read().decode())`
			`return data["choices"][0]["message"]["content"].strip(), data.get("usage", {})`

			`first_reply, usage = call(messages)`
			`transcript.append((messages[-1]["content"], first_reply, usage))`
			`messages.append({"role": "assistant", "content": first_reply})`

			`for q in turns:`
			`messages.append({"role": "user", "content": q})`
			`reply, usage = call(messages)`
			`transcript.append((q, reply, usage))`
			`messages.append({"role": "assistant", "content": reply})`

			`report = ["# Local Decision Session Test", "", f"Model: {MODEL}", f"URL: {URL}", "", "## Transcript", ""]`
			`for i, (q, a, usage) in enumerate(transcript, 1):`
			`report.append(f"### Turn {i}")`
			`report.append(f"User: {q}")`
			`report.append("")`
			`report.append(f"Assistant: {a}")`
			`report.append("")`
			`report.append(f"Usage: {usage}")`
			`report.append("")`

			`OUT.write_text("\n".join(report))`
			`print(str(OUT))`
			`print("\n".join(report[:120]))`