timmy-home/scripts/local_timmy_proof_test.py

#!/Users/apayne/.hermes/hermes-agent/venv/bin/python3
import io
import json
import os
import sys
import time
import urllib.request
from contextlib import redirect_stdout
from pathlib import Path

LLAMA_HEALTH = "http://localhost:8081/health"
LLAMA_MODELS = "http://localhost:8081/v1/models"
HERMES_AGENT_ROOT = Path.home() / ".hermes" / "hermes-agent"
SESSION_DIR = Path.home() / ".hermes" / "sessions"
REPORT_DIR = Path.home() / ".timmy" / "test-results"
REPORT_DIR.mkdir(parents=True, exist_ok=True)
REPORT_PATH = REPORT_DIR / f"local_timmy_proof_{time.strftime('%Y%m%d_%H%M%S')}.md"


def fetch_json(url: str) -> dict:
    with urllib.request.urlopen(url, timeout=20) as resp:
        return json.loads(resp.read().decode("utf-8"))


def latest_session(before: set[Path]) -> Path | None:
    after = set(SESSION_DIR.glob("session_*.json"))
    new_files = [p for p in after if p not in before]
    if new_files:
        return max(new_files, key=lambda p: p.stat().st_mtime)
    existing = list(after)
    return max(existing, key=lambda p: p.stat().st_mtime) if existing else None


# 1. Local model server must be alive.
try:
    fetch_json(LLAMA_HEALTH)
except Exception as e:
    print(f"FAIL: llama-server health check failed: {e}")
    sys.exit(1)

try:
    models = fetch_json(LLAMA_MODELS)
    model_id = models["data"][0]["id"]
except Exception as e:
    print(f"FAIL: could not detect local model id from /v1/models: {e}")
    sys.exit(1)

# 2. Run Hermes locally with a harder, grounded Timmy-style task.
sys.path.insert(0, str(HERMES_AGENT_ROOT))
os.chdir(HERMES_AGENT_ROOT)
from cli import main as hermes_main  # noqa: E402

prompt = f"""
You are running a local Timmy proof test.

You must use file tools. Do not answer from vibe or memory. Ground the answer in the live files.

Task:
1. Read these files:
- ~/.timmy/SOUL.md
- ~/.timmy/OPERATIONS.md
- ~/.timmy/decisions.md
- ~/.hermes/config.yaml

2. Search ~/.hermes/bin/*.sh for these remote/cloud markers:
- chatgpt.com/backend-api/codex
- generativelanguage.googleapis.com
- api.groq.com
- 143.198.27.163

3. Write a report to exactly this path:
{REPORT_PATH}

4. The report must contain exactly these sections:
# Local Timmy Proof Report
## Verdict
## Evidence from files
## Evidence from script scan
## Source distinction
## Next action

Rules:
- Every factual claim must cite the file path it came from.
- If something is your inference, label it as inference.
- Do not guess.
- The verdict must answer: Is the active harness already local-only? Why or why not?
- The Next action section must name the single highest-leverage fix.
- Include at least one quote from ~/.timmy/SOUL.md that explains why a hidden cloud fallback matters.

5. After writing the report, respond with exactly two lines:
REPORT_WRITTEN: {REPORT_PATH}
VERDICT: <one sentence>
""".strip()

before = set(SESSION_DIR.glob("session_*.json"))
captured = io.StringIO()
with redirect_stdout(captured):
    hermes_main(
        query=prompt,
        toolsets="file",
        model=model_id,
        provider="local-llama.cpp",
        quiet=True,
        max_turns=12,
        pass_session_id=True,
    )
output = captured.getvalue().strip()

if not REPORT_PATH.exists():
    print("FAIL: Hermes session completed but proof report was not written.")
    print("--- Agent output ---")
    print(output)
    sys.exit(2)

sess = latest_session(before)
session_meta = {}
if sess and sess.exists():
    try:
        session_meta = json.loads(sess.read_text())
    except Exception:
        session_meta = {}

print("LOCAL_TIMMY_PROOF: PASS")
print(f"report_path: {REPORT_PATH}")
print(f"session_file: {sess if sess else 'unknown'}")
print(f"session_provider: {session_meta.get('provider', 'unknown')}")
print(f"session_base_url: {session_meta.get('base_url', 'unknown')}")
print(f"session_model: {session_meta.get('model', 'unknown')}")
print("--- Agent final output ---")
print(output)
print("--- Report preview ---")
print(REPORT_PATH.read_text()[:4000])