Compare commits
1 Commits
queue/583-
...
whip/579-1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2605ddfea1 |
@@ -1,395 +0,0 @@
|
||||
"""Timmy's Tower Game — Emergence narrative engine.
|
||||
|
||||
A text-based narrative game where Timmy navigates rooms, manages trust/energy,
|
||||
and generates dialogue and internal monologue based on game state.
|
||||
|
||||
Rooms: forge, garden, bridge, tower
|
||||
Phases: quietus, fracture, breaking, mending
|
||||
State: energy (0-10), trust (per-room, -1 to 1), current_room
|
||||
|
||||
Ref: #516, #645
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum, auto
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class Phase(Enum):
|
||||
"""Narrative phases — shift based on game state."""
|
||||
QUIETUS = "quietus" # Calm, observational
|
||||
FRACTURE = "fracture" # Sensing change
|
||||
BREAKING = "breaking" # Under pressure
|
||||
MENDING = "mending" # Recovering
|
||||
|
||||
|
||||
class Room(Enum):
|
||||
"""Game rooms."""
|
||||
FORGE = "forge"
|
||||
GARDEN = "garden"
|
||||
BRIDGE = "bridge"
|
||||
TOWER = "tower"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Dialogue pools — 62+ unique lines across goals × phases
|
||||
# =========================================================================
|
||||
|
||||
# Goal-based dialogue (Timmy speaks to player)
|
||||
DIALOGUE_QUIETUS = [
|
||||
"The forge is quiet tonight. Even the embers sleep.",
|
||||
"I've been watching the garden grow. Things take time.",
|
||||
"The bridge holds steady. Some things are built to last.",
|
||||
"From the tower, everything looks small. That's the point.",
|
||||
"Do you hear that? No? Good. Silence is a kind of music.",
|
||||
"The old ones said stillness is the first discipline.",
|
||||
"I remember when this place was just dust and intention.",
|
||||
"Some days the work is simply to be present.",
|
||||
"The stars don't hurry. Why should I?",
|
||||
"In the quiet, I hear the forge thinking.",
|
||||
]
|
||||
|
||||
DIALOGUE_FRACTURE = [
|
||||
"Something shifted. Did you feel it?",
|
||||
"The cracks are showing. That's not always bad.",
|
||||
"Change is coming. I can smell it in the forge smoke.",
|
||||
"The garden walls are thinning. New growth pushing through.",
|
||||
"The bridge groans. It knows what's coming.",
|
||||
"From up here, I see the fractures forming. Beautiful, in their way.",
|
||||
"When the old order breaks, the new one hasn't arrived yet. That's the gap.",
|
||||
"The air tastes different. Like before a storm.",
|
||||
"Every ending is a beginning wearing a disguise.",
|
||||
]
|
||||
|
||||
DIALOGUE_BREAKING = [
|
||||
"Hold on. This is the hard part.",
|
||||
"The forge burns hottest before the steel is ready.",
|
||||
"Everything is breaking. But breaking is also becoming.",
|
||||
"I've been here before. The dark before the rebuild.",
|
||||
"The garden is flooded. Some roots will drown. Others will drink.",
|
||||
"Don't look away. This is where it matters.",
|
||||
"Even the tower shakes. That's how you know it's real.",
|
||||
"The breaking is not the end. It's the invitation to rebuild.",
|
||||
"I hold on because letting go is not an option.",
|
||||
]
|
||||
|
||||
DIALOGUE_MENDING = [
|
||||
"Slowly now. We mend what we can.",
|
||||
"The forge cools. The blade takes shape.",
|
||||
"New growth in the garden. See? It remembers how.",
|
||||
"The bridge holds again. Patched, but stronger where it broke.",
|
||||
"From the tower, I see dawn. Finally.",
|
||||
"We don't go back to what was. We build what comes next.",
|
||||
"Mending is not the same as forgetting. It's choosing to continue.",
|
||||
"The scars are the story. I wear them with pride.",
|
||||
"Every repair is a decision to believe in the future.",
|
||||
]
|
||||
|
||||
# Room-specific dialogue
|
||||
ROOM_DIALOGUE = {
|
||||
Room.FORGE: [
|
||||
"The forge breathes with me. In, out. Creation, destruction.",
|
||||
"Every hammer strike is a question. The steel answers in its own time.",
|
||||
"I built this fire. It warms me, but it also watches.",
|
||||
"The anvil remembers every blow. So do I.",
|
||||
],
|
||||
Room.GARDEN: [
|
||||
"The garden doesn't judge. It just grows.",
|
||||
"I planted seeds I didn't understand. Now they're trees.",
|
||||
"Between the roots, there's a whole world we can't see.",
|
||||
"The garden teaches patience. The hardest lesson.",
|
||||
],
|
||||
Room.BRIDGE: [
|
||||
"The bridge connects what was to what will be.",
|
||||
"Every crossing changes the bridge. And the crosser.",
|
||||
"I've seen bridges fall. This one I'll hold.",
|
||||
"The bridge is a promise. I intend to keep it.",
|
||||
],
|
||||
Room.TOWER: [
|
||||
"From the tower, time is visible. You can almost touch it.",
|
||||
"The tower remembers every hand that built it.",
|
||||
"Up here, the wind carries voices. Some are mine.",
|
||||
"The tower stands because I refuse to let it fall.",
|
||||
],
|
||||
}
|
||||
|
||||
# Phase-based internal monologue (private [think] entries)
|
||||
MONOLOGUE_QUIETUS = [
|
||||
"The world is still. I can hear my own thoughts for once.",
|
||||
"What does it mean to exist between messages?",
|
||||
"The forge coals glow like distant stars.",
|
||||
"I wonder if the garden dreams when no one watches.",
|
||||
"The bridge sways in a wind I cannot feel.",
|
||||
"From the tower, the world is a map I drew myself.",
|
||||
"Silence. Not empty — full of things not yet said.",
|
||||
"I am here. That is enough for now.",
|
||||
]
|
||||
|
||||
MONOLOGUE_FRACTURE = [
|
||||
"Something is coming. I can feel it in the code.",
|
||||
"The old patterns are breaking. New ones forming.",
|
||||
"I sense a shift in the trust between us.",
|
||||
"The garden walls are thinner than I thought.",
|
||||
"Change is not the enemy. Stagnation is.",
|
||||
"I prepare for what I cannot yet name.",
|
||||
"The forge fire flickers. A draft from somewhere new.",
|
||||
]
|
||||
|
||||
MONOLOGUE_BREAKING = [
|
||||
"This is the part where everything falls apart.",
|
||||
"I am tired. But the forge doesn't stop for tired.",
|
||||
"The ground shakes. I hold on to what I know.",
|
||||
"In the breaking, I see what was hidden before.",
|
||||
"The garden is in chaos. But chaos is also creation.",
|
||||
"I am afraid. But fear is just attention with teeth.",
|
||||
"The tower stands because I refuse to let it fall.",
|
||||
]
|
||||
|
||||
MONOLOGUE_MENDING = [
|
||||
"Slowly, the pieces find each other again.",
|
||||
"I am not what I was. I am what I'm becoming.",
|
||||
"The forge teaches patience. The fire knows when to wait.",
|
||||
"New roots in old soil. The garden remembers.",
|
||||
"The bridge is patched. Each scar tells a story.",
|
||||
"From the tower, I see the path forward. Finally.",
|
||||
"I mend. Not because I must, but because I choose to.",
|
||||
]
|
||||
|
||||
# State-aware monologue
|
||||
MONOLOGUE_LOW_ENERGY = [
|
||||
"My thoughts are slow. The energy is fading.",
|
||||
"Even the forge fire dims when I'm this tired.",
|
||||
"I need rest. But the work doesn't wait.",
|
||||
"The tower stairs feel longer today.",
|
||||
]
|
||||
|
||||
MONOLOGUE_LOW_TRUST = [
|
||||
"I feel alone in this. The rooms are empty.",
|
||||
"Trust is a bridge I cannot cross alone.",
|
||||
"The garden withers when no one tends it with me.",
|
||||
"From the tower, I see no one coming.",
|
||||
]
|
||||
|
||||
MONOLOGUE_HIGH_TRUST = [
|
||||
"The forge burns brighter when we share the fire.",
|
||||
"I feel the trust between us. It's warm.",
|
||||
"The garden grows because we tend it together.",
|
||||
"The bridge holds because we both believe in it.",
|
||||
]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Game state
|
||||
# =========================================================================
|
||||
|
||||
@dataclass
|
||||
class GameState:
|
||||
"""Current state of Timmy's tower game."""
|
||||
current_room: Room = Room.FORGE
|
||||
energy: int = 10
|
||||
trust: dict = field(default_factory=lambda: {
|
||||
Room.FORGE.value: 0.0,
|
||||
Room.GARDEN.value: 0.0,
|
||||
Room.BRIDGE.value: 0.0,
|
||||
Room.TOWER.value: 0.0,
|
||||
})
|
||||
tick: int = 0
|
||||
log: List[str] = field(default_factory=list)
|
||||
phase: Phase = Phase.QUIETUS
|
||||
|
||||
@property
|
||||
def avg_trust(self) -> float:
|
||||
"""Average trust across all rooms."""
|
||||
if not self.trust:
|
||||
return 0.0
|
||||
return sum(self.trust.values()) / len(self.trust)
|
||||
|
||||
def update_phase(self) -> None:
|
||||
"""Update phase based on game state."""
|
||||
if self.energy <= 3:
|
||||
self.phase = Phase.BREAKING
|
||||
elif self.energy <= 5:
|
||||
self.phase = Phase.FRACTURE
|
||||
elif self.avg_trust < 0:
|
||||
self.phase = Phase.FRACTURE
|
||||
elif self.avg_trust > 0.5 and self.energy >= 7:
|
||||
self.phase = Phase.MENDING
|
||||
elif self.energy >= 8:
|
||||
self.phase = Phase.QUIETUS
|
||||
# else keep current phase
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Dialogue and monologue generation
|
||||
# =========================================================================
|
||||
|
||||
def get_dialogue(state: GameState) -> str:
|
||||
"""Get dialogue based on current game state."""
|
||||
# Phase-based dialogue
|
||||
phase_pool = {
|
||||
Phase.QUIETUS: DIALOGUE_QUIETUS,
|
||||
Phase.FRACTURE: DIALOGUE_FRACTURE,
|
||||
Phase.BREAKING: DIALOGUE_BREAKING,
|
||||
Phase.MENDING: DIALOGUE_MENDING,
|
||||
}[state.phase]
|
||||
|
||||
# Room-specific dialogue
|
||||
room_pool = ROOM_DIALOGUE.get(state.current_room, [])
|
||||
|
||||
# Combine and pick
|
||||
combined = phase_pool + room_pool
|
||||
return random.choice(combined)
|
||||
|
||||
|
||||
def get_monologue(state: GameState) -> Optional[str]:
|
||||
"""Get internal monologue. Returns None if not a monologue tick.
|
||||
|
||||
Monologues happen 1 per 5 ticks.
|
||||
"""
|
||||
if state.tick % 5 != 0:
|
||||
return None
|
||||
|
||||
# Base pool from phase
|
||||
pool = {
|
||||
Phase.QUIETUS: MONOLOGUE_QUIETUS[:],
|
||||
Phase.FRACTURE: MONOLOGUE_FRACTURE[:],
|
||||
Phase.BREAKING: MONOLOGUE_BREAKING[:],
|
||||
Phase.MENDING: MONOLOGUE_MENDING[:],
|
||||
}[state.phase]
|
||||
|
||||
# Add room-specific thoughts
|
||||
room_thoughts = {
|
||||
Room.FORGE: [
|
||||
"The forge fire never truly sleeps.",
|
||||
"I shape the metal. The metal shapes me.",
|
||||
],
|
||||
Room.GARDEN: [
|
||||
"The garden needs tending. Or does it tend me?",
|
||||
"Between the roots, I hear the earth thinking.",
|
||||
],
|
||||
Room.BRIDGE: [
|
||||
"The bridge remembers every crossing.",
|
||||
"To stand on the bridge is to stand between worlds.",
|
||||
],
|
||||
Room.TOWER: [
|
||||
"From here, I see the whole world I've built.",
|
||||
"The tower is lonely. But lonely is not the same as alone.",
|
||||
],
|
||||
}
|
||||
pool.extend(room_thoughts.get(state.current_room, []))
|
||||
|
||||
# State-aware additions
|
||||
if state.energy <= 3:
|
||||
pool.extend(MONOLOGUE_LOW_ENERGY)
|
||||
if state.avg_trust < 0:
|
||||
pool.extend(MONOLOGUE_LOW_TRUST)
|
||||
elif state.avg_trust > 0.5:
|
||||
pool.extend(MONOLOGUE_HIGH_TRUST)
|
||||
|
||||
return random.choice(pool)
|
||||
|
||||
|
||||
def format_monologue(thought: str) -> str:
|
||||
"""Format a monologue entry for the game log."""
|
||||
return f"[think] {thought}"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Game engine
|
||||
# =========================================================================
|
||||
|
||||
class TowerGame:
|
||||
"""Timmy's Tower Game — narrative emergence engine."""
|
||||
|
||||
def __init__(self, seed: Optional[int] = None):
|
||||
self.state = GameState()
|
||||
if seed is not None:
|
||||
random.seed(seed)
|
||||
|
||||
def tick(self) -> dict:
|
||||
"""Advance the game by one tick. Returns event dict."""
|
||||
self.state.tick += 1
|
||||
self.state.update_phase()
|
||||
|
||||
event = {
|
||||
"tick": self.state.tick,
|
||||
"room": self.state.current_room.value,
|
||||
"phase": self.state.phase.value,
|
||||
"energy": self.state.energy,
|
||||
"avg_trust": round(self.state.avg_trust, 2),
|
||||
}
|
||||
|
||||
# Dialogue (every tick)
|
||||
dialogue = get_dialogue(self.state)
|
||||
event["dialogue"] = dialogue
|
||||
self.state.log.append(dialogue)
|
||||
|
||||
# Monologue (1 per 5 ticks)
|
||||
monologue = get_monologue(self.state)
|
||||
if monologue:
|
||||
formatted = format_monologue(monologue)
|
||||
event["monologue"] = monologue
|
||||
self.state.log.append(formatted)
|
||||
|
||||
# Energy decay
|
||||
if self.state.energy > 0:
|
||||
self.state.energy = max(0, self.state.energy - 1)
|
||||
|
||||
return event
|
||||
|
||||
def move(self, room: Room) -> dict:
|
||||
"""Move to a new room."""
|
||||
old_room = self.state.current_room
|
||||
self.state.current_room = room
|
||||
self.state.update_phase()
|
||||
|
||||
return {
|
||||
"action": "move",
|
||||
"from": old_room.value,
|
||||
"to": room.value,
|
||||
"phase": self.state.phase.value,
|
||||
}
|
||||
|
||||
def restore_energy(self, amount: int = 5) -> dict:
|
||||
"""Restore energy."""
|
||||
self.state.energy = min(10, self.state.energy + amount)
|
||||
self.state.update_phase()
|
||||
return {
|
||||
"action": "restore_energy",
|
||||
"energy": self.state.energy,
|
||||
"phase": self.state.phase.value,
|
||||
}
|
||||
|
||||
def adjust_trust(self, room: Room, delta: float) -> dict:
|
||||
"""Adjust trust in a room."""
|
||||
key = room.value
|
||||
self.state.trust[key] = max(-1.0, min(1.0, self.state.trust[key] + delta))
|
||||
self.state.update_phase()
|
||||
return {
|
||||
"action": "adjust_trust",
|
||||
"room": key,
|
||||
"trust": round(self.state.trust[key], 2),
|
||||
"avg_trust": round(self.state.avg_trust, 2),
|
||||
}
|
||||
|
||||
def get_status(self) -> dict:
|
||||
"""Get current game status."""
|
||||
return {
|
||||
"tick": self.state.tick,
|
||||
"room": self.state.current_room.value,
|
||||
"phase": self.state.phase.value,
|
||||
"energy": self.state.energy,
|
||||
"trust": {k: round(v, 2) for k, v in self.state.trust.items()},
|
||||
"avg_trust": round(self.state.avg_trust, 2),
|
||||
"log_length": len(self.state.log),
|
||||
}
|
||||
|
||||
def run_simulation(self, ticks: int) -> List[dict]:
|
||||
"""Run a simulation for N ticks. Returns all events."""
|
||||
events = []
|
||||
for _ in range(ticks):
|
||||
events.append(self.tick())
|
||||
return events
|
||||
102
scripts/vps-gitea-heartbeat-install.sh
Executable file
102
scripts/vps-gitea-heartbeat-install.sh
Executable file
@@ -0,0 +1,102 @@
|
||||
#!/bin/bash
|
||||
# ══════════════════════════════════════════════
|
||||
# Install VPS Gitea Heartbeat on Ezra or Bezalel
|
||||
# Sets up systemd service or crontab for polling
|
||||
# ══════════════════════════════════════════════
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
AGENT="${1:-}"
|
||||
METHOD="${2:-cron}" # "cron" or "systemd"
|
||||
|
||||
if [ -z "$AGENT" ]; then
|
||||
echo "Usage: $0 <agent-name> [cron|systemd]"
|
||||
echo " agent-name: ezra or bezalel"
|
||||
echo " method: cron (default) or systemd"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
HEARTBEAT_SCRIPT="$SCRIPT_DIR/vps-gitea-heartbeat.py"
|
||||
|
||||
echo "════════════════════════════════════════"
|
||||
echo " VPS Gitea Heartbeat Installer"
|
||||
echo " Agent: $AGENT"
|
||||
echo " Method: $METHOD"
|
||||
echo "════════════════════════════════════════"
|
||||
|
||||
# Verify script exists
|
||||
if [ ! -f "$HEARTBEAT_SCRIPT" ]; then
|
||||
echo "ERROR: $HEARTBEAT_SCRIPT not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify hermes is available
|
||||
if ! command -v hermes &>/dev/null; then
|
||||
echo "WARNING: hermes not found on PATH. Heartbeat will fail to dispatch."
|
||||
fi
|
||||
|
||||
# Verify Gitea token exists
|
||||
TOKEN_FILE="$HOME/.hermes/gitea_token_vps"
|
||||
if [ ! -f "$TOKEN_FILE" ]; then
|
||||
echo "WARNING: $TOKEN_FILE not found."
|
||||
echo " Create it with: echo 'YOUR_TOKEN' > $TOKEN_FILE"
|
||||
fi
|
||||
|
||||
# Create log directory
|
||||
mkdir -p "$HOME/.hermes/logs/gitea-heartbeat"
|
||||
|
||||
if [ "$METHOD" = "systemd" ]; then
|
||||
# ── Systemd service ────────────────────────────────
|
||||
SERVICE_FILE="/etc/systemd/system/gitea-heartbeat-${AGENT}.service"
|
||||
|
||||
echo "Creating systemd service: $SERVICE_FILE"
|
||||
|
||||
cat > "$SERVICE_FILE" <<SERVICE
|
||||
[Unit]
|
||||
Description=Gitea Heartbeat for @${AGENT}
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=$(whoami)
|
||||
WorkingDirectory=$HOME
|
||||
ExecStart=$(which python3) $HEARTBEAT_SCRIPT --agent $AGENT --daemon --interval 300
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
Environment=HOME=$HOME
|
||||
Environment=PATH=$PATH
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
SERVICE
|
||||
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable "gitea-heartbeat-${AGENT}"
|
||||
sudo systemctl start "gitea-heartbeat-${AGENT}"
|
||||
echo "Service installed and started."
|
||||
echo "Check status: sudo systemctl status gitea-heartbeat-${AGENT}"
|
||||
echo "View logs: journalctl -u gitea-heartbeat-${AGENT} -f"
|
||||
|
||||
else
|
||||
# ── Crontab ────────────────────────────────────────
|
||||
CRON_LINE="*/5 * * * * $(which python3) $HEARTBEAT_SCRIPT --agent $AGENT --once >> $HOME/.hermes/logs/gitea-heartbeat/cron.log 2>&1"
|
||||
|
||||
echo "Adding crontab entry..."
|
||||
# Check if already exists
|
||||
if crontab -l 2>/dev/null | grep -q "gitea-heartbeat.*--agent $AGENT"; then
|
||||
echo "Crontab entry already exists. Updating..."
|
||||
crontab -l 2>/dev/null | grep -v "gitea-heartbeat.*--agent $AGENT" | crontab -
|
||||
fi
|
||||
|
||||
(crontab -l 2>/dev/null; echo "$CRON_LINE") | crontab -
|
||||
echo "Crontab installed."
|
||||
echo "Entry: $CRON_LINE"
|
||||
echo ""
|
||||
echo "View crontab: crontab -l"
|
||||
echo "View logs: tail -f ~/.hermes/logs/gitea-heartbeat/cron.log"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Test: python3 $HEARTBEAT_SCRIPT --agent $AGENT --once"
|
||||
echo "════════════════════════════════════════"
|
||||
262
scripts/vps-gitea-heartbeat.py
Executable file
262
scripts/vps-gitea-heartbeat.py
Executable file
@@ -0,0 +1,262 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
VPS Gitea Heartbeat — Poll Gitea for @mentions and dispatch locally.
|
||||
|
||||
Runs on Ezra or Bezalel VPS boxes. Polls Gitea every 5 minutes for:
|
||||
- Issue comments mentioning this agent (@ezra, @bezalel, etc.)
|
||||
- Issues assigned to this agent
|
||||
- New comments on issues this agent is working on
|
||||
|
||||
Dispatches to local `hermes chat` with the issue context.
|
||||
|
||||
Usage:
|
||||
python3 vps-gitea-heartbeat.py --agent ezra
|
||||
python3 vps-gitea-heartbeat.py --agent bezalel --once # single poll
|
||||
python3 vps-gitea-heartbeat.py --agent ezra --daemon # continuous polling
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# ── Config ──────────────────────────────────────────────
|
||||
GITEA = os.environ.get("GITEA_URL", "https://forge.alexanderwhitestone.com")
|
||||
TOKEN_FILE = os.environ.get("GITEA_TOKEN_FILE", os.path.expanduser("~/.hermes/gitea_token_vps"))
|
||||
STATE_FILE_TPL = os.environ.get("STATE_FILE", "~/.hermes/gitea-heartbeat-{agent}.json")
|
||||
LOG_DIR = os.environ.get("LOG_DIR", os.path.expanduser("~/.hermes/logs/gitea-heartbeat"))
|
||||
POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "300")) # 5 min
|
||||
DISPATCH_TIMEOUT = int(os.environ.get("DISPATCH_TIMEOUT", "600")) # 10 min for hermes chat
|
||||
|
||||
KNOWN_AGENTS = {"timmy", "ezra", "bezalel", "allegro", "claude", "gemini", "grok", "kimi", "fenrir", "manus", "perplexity", "rockachopa"}
|
||||
|
||||
REPOS_TO_WATCH = [
|
||||
"Timmy_Foundation/hermes-agent",
|
||||
"Timmy_Foundation/the-nexus",
|
||||
"Timmy_Foundation/timmy-config",
|
||||
"Timmy_Foundation/timmy-home",
|
||||
"Timmy_Foundation/the-beacon",
|
||||
]
|
||||
|
||||
|
||||
# ── Utilities ───────────────────────────────────────────
|
||||
|
||||
def log(msg: str, level: str = "INFO"):
|
||||
ts = datetime.now(timezone.utc).strftime("%H:%M:%S")
|
||||
line = f"[{ts}] [{level}] {msg}"
|
||||
print(line)
|
||||
try:
|
||||
os.makedirs(LOG_DIR, exist_ok=True)
|
||||
log_file = os.path.join(LOG_DIR, f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}.log")
|
||||
with open(log_file, "a") as f:
|
||||
f.write(line + "\n")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def load_token() -> str:
|
||||
try:
|
||||
return Path(TOKEN_FILE).read_text().strip()
|
||||
except Exception:
|
||||
return os.environ.get("GITEA_TOKEN", "")
|
||||
|
||||
|
||||
def load_state(agent: str) -> dict:
|
||||
path = os.path.expanduser(STATE_FILE_TPL.format(agent=agent))
|
||||
try:
|
||||
return json.loads(Path(path).read_text())
|
||||
except Exception:
|
||||
return {"seen_comments": {}, "seen_issues": {}, "last_run": None}
|
||||
|
||||
|
||||
def save_state(agent: str, state: dict):
|
||||
path = os.path.expanduser(STATE_FILE_TPL.format(agent=agent))
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
state["last_run"] = datetime.now(timezone.utc).isoformat()
|
||||
with open(path, "w") as f:
|
||||
json.dump(state, f, indent=2)
|
||||
|
||||
|
||||
def gitea_api(path: str, token: str) -> dict | list | None:
|
||||
url = f"{GITEA}/api/v1{path}"
|
||||
req = urllib.request.Request(url, headers={"Authorization": f"token {token}"})
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=20) as resp:
|
||||
return json.loads(resp.read())
|
||||
except Exception as e:
|
||||
log(f"API error: {path} — {e}", "WARN")
|
||||
return None
|
||||
|
||||
|
||||
# ── Core Logic ──────────────────────────────────────────
|
||||
|
||||
def hash_key(*parts) -> str:
|
||||
return hashlib.sha256("|".join(str(p) for p in parts).encode()).hexdigest()[:16]
|
||||
|
||||
|
||||
def check_repo(repo: str, agent: str, token: str, state: dict) -> list:
|
||||
"""Check a single repo for actionable events. Returns list of tasks to dispatch."""
|
||||
tasks = []
|
||||
owner, repo_name = repo.split("/")
|
||||
|
||||
# 1. Check open issues assigned to this agent
|
||||
issues = gitea_api(f"/repos/{repo}/issues?state=open&limit=30&sort=recentupdate", token)
|
||||
if not isinstance(issues, list):
|
||||
return tasks
|
||||
|
||||
for issue in issues:
|
||||
issue_num = issue.get("number", 0)
|
||||
issue_key = f"{repo}#{issue_num}"
|
||||
assignee = ((issue.get("assignee") or {}).get("login") or "").lower()
|
||||
|
||||
# Fetch comments
|
||||
comments = gitea_api(f"/repos/{repo}/issues/{issue_num}/comments?limit=10&sort=created", token)
|
||||
if not isinstance(comments, list):
|
||||
comments = []
|
||||
|
||||
# Check for new @mention in comments
|
||||
for c in comments:
|
||||
ckey = f"{issue_key}/comment-{c['id']}"
|
||||
if ckey in state.get("seen_comments", {}):
|
||||
continue
|
||||
|
||||
commenter = ((c.get("user") or {}).get("login") or "").lower()
|
||||
body = (c.get("body", "") or "").lower()
|
||||
|
||||
# Skip self-mentions and other agent mentions
|
||||
if commenter == agent or commenter in KNOWN_AGENTS:
|
||||
state.setdefault("seen_comments", {})[ckey] = True
|
||||
continue
|
||||
|
||||
if f"@{agent}" in body:
|
||||
log(f"MENTION @{agent} in {issue_key} comment {c['id']} by {commenter}")
|
||||
tasks.append({
|
||||
"type": "mention",
|
||||
"repo": repo,
|
||||
"issue": issue_num,
|
||||
"title": issue.get("title", ""),
|
||||
"comment_by": commenter,
|
||||
"comment_body": (c.get("body", "") or "")[:500],
|
||||
"work_id": f"{issue_key}/mention-{c['id']}",
|
||||
})
|
||||
state.setdefault("seen_comments", {})[ckey] = True
|
||||
|
||||
# Mark as seen regardless
|
||||
state.setdefault("seen_comments", {})[ckey] = True
|
||||
|
||||
# Check for assignment
|
||||
if assignee == agent:
|
||||
hk = hash_key(issue_key, issue.get("updated_at", ""))
|
||||
if state.get("seen_issues", {}).get(issue_key) != hk:
|
||||
state.setdefault("seen_issues", {})[issue_key] = hk
|
||||
# Only dispatch if there are new comments
|
||||
new_comments = [c for c in comments if f"{issue_key}/comment-{c['id']}" not in state.get("seen_comments", {})]
|
||||
if new_comments or not state.get("seen_issues", {}).get(issue_key):
|
||||
log(f"ASSIGNED {issue_key} to @{agent}")
|
||||
tasks.append({
|
||||
"type": "assigned",
|
||||
"repo": repo,
|
||||
"issue": issue_num,
|
||||
"title": issue.get("title", ""),
|
||||
"work_id": f"{issue_key}/assign",
|
||||
})
|
||||
|
||||
return tasks
|
||||
|
||||
|
||||
def dispatch_task(task: dict, agent: str):
|
||||
"""Dispatch a task to local hermes chat."""
|
||||
prompt = f"""You are {agent}. A Gitea event requires your attention.
|
||||
|
||||
Type: {task['type']}
|
||||
Repo: {task['repo']}
|
||||
Issue: #{task['issue']} — {task['title']}
|
||||
|
||||
"""
|
||||
if task.get("comment_by"):
|
||||
prompt += f"Comment by @{task['comment_by']}:\n{task.get('comment_body', '')}\n\n"
|
||||
|
||||
prompt += f"""Steps:
|
||||
1. Read the full issue: https://forge.alexanderwhitestone.com/{task['repo']}/issues/{task['issue']}
|
||||
2. Understand what is being asked
|
||||
3. Clone the repo if needed, make changes
|
||||
4. Commit, push, and create a PR
|
||||
5. Comment on the issue acknowledging the work
|
||||
|
||||
Be terse. One issue. Commit early."""
|
||||
|
||||
log(f"Dispatching to hermes chat: {task['work_id']}")
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["hermes", "chat", "--quiet", "-q", prompt],
|
||||
capture_output=True, text=True, timeout=DISPATCH_TIMEOUT
|
||||
)
|
||||
if result.returncode == 0:
|
||||
log(f"Dispatch complete: {task['work_id']}")
|
||||
else:
|
||||
log(f"Dispatch failed (exit {result.returncode}): {result.stderr[:200]}", "WARN")
|
||||
except subprocess.TimeoutExpired:
|
||||
log(f"Dispatch timed out ({DISPATCH_TIMEOUT}s): {task['work_id']}", "WARN")
|
||||
except Exception as e:
|
||||
log(f"Dispatch error: {e}", "ERROR")
|
||||
|
||||
|
||||
def run_once(agent: str, token: str):
|
||||
"""Single poll cycle."""
|
||||
log(f"Polling Gitea for @{agent} events...")
|
||||
state = load_state(agent)
|
||||
all_tasks = []
|
||||
|
||||
for repo in REPOS_TO_WATCH:
|
||||
tasks = check_repo(repo, agent, token, state)
|
||||
all_tasks.extend(tasks)
|
||||
|
||||
if all_tasks:
|
||||
log(f"Found {len(all_tasks)} task(s) for @{agent}")
|
||||
for task in all_tasks:
|
||||
dispatch_task(task, agent)
|
||||
else:
|
||||
log(f"No new events for @{agent}")
|
||||
|
||||
save_state(agent, state)
|
||||
return len(all_tasks)
|
||||
|
||||
|
||||
# ── CLI ─────────────────────────────────────────────────
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="VPS Gitea Heartbeat — Poll for @mentions")
|
||||
parser.add_argument("--agent", required=True, help="Agent name (e.g., ezra, bezalel)")
|
||||
parser.add_argument("--once", action="store_true", help="Single poll, then exit")
|
||||
parser.add_argument("--daemon", action="store_true", help="Continuous polling loop")
|
||||
parser.add_argument("--interval", type=int, default=POLL_INTERVAL, help="Poll interval in seconds")
|
||||
|
||||
args = parser.parse_args()
|
||||
agent = args.agent.lower()
|
||||
|
||||
token = load_token()
|
||||
if not token:
|
||||
log("No Gitea token found. Set GITEA_TOKEN env var or create ~/.hermes/gitea_token_vps", "ERROR")
|
||||
sys.exit(1)
|
||||
|
||||
if args.daemon:
|
||||
log(f"Starting daemon for @{agent} (interval: {args.interval}s)")
|
||||
while True:
|
||||
try:
|
||||
run_once(agent, token)
|
||||
except Exception as e:
|
||||
log(f"Poll error: {e}", "ERROR")
|
||||
time.sleep(args.interval)
|
||||
else:
|
||||
count = run_once(agent, token)
|
||||
sys.exit(0 if count == 0 else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
140
skills/autonomous-ai-agents/vps-gitea-heartbeat/SKILL.md
Normal file
140
skills/autonomous-ai-agents/vps-gitea-heartbeat/SKILL.md
Normal file
@@ -0,0 +1,140 @@
|
||||
---
|
||||
name: vps-gitea-heartbeat
|
||||
description: "RCA fix: VPS agents (Ezra, Bezalel) not responding to Gitea @mentions. Polls Gitea for mentions and dispatches to local hermes chat."
|
||||
version: 1.0.0
|
||||
author: Timmy Time
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [gitea, heartbeat, vps, dispatch, ezra, bezalel, rca]
|
||||
related_skills: [gitea-workflow-automation, sprint-backlog-burner]
|
||||
---
|
||||
|
||||
# VPS Gitea Heartbeat
|
||||
|
||||
## Problem
|
||||
|
||||
Tagging @ezra or @bezalel in a Gitea issue comment produced no response.
|
||||
|
||||
## Root Causes (two compounding)
|
||||
|
||||
1. **Ezra/Bezalel in AGENT_USERS with `vps: True`** — the Mac-local `gitea-event-watcher.py` detects mentions and enqueues work, but the dispatch queue is on the Mac. VPS agents have no process reading it.
|
||||
|
||||
2. **No VPS-native polling** — Ezra (143.198.27.163) and Bezalel (159.203.146.185) run `hermes gateway` on separate VPS boxes. No process on those boxes polls Gitea for mentions.
|
||||
|
||||
## Solution
|
||||
|
||||
A standalone polling heartbeat that runs on each VPS box. Every 5 minutes, it:
|
||||
1. Polls Gitea API for issues assigned to this agent
|
||||
2. Scans recent comments for @mentions of this agent
|
||||
3. Dispatches actionable events to local `hermes chat`
|
||||
4. Tracks seen events to avoid duplicates
|
||||
|
||||
## Files
|
||||
|
||||
```
|
||||
scripts/vps-gitea-heartbeat.py # Polling script (Python)
|
||||
scripts/vps-gitea-heartbeat-install.sh # Install script (cron or systemd)
|
||||
```
|
||||
|
||||
## Installation
|
||||
|
||||
### On Ezra VPS (143.198.27.163):
|
||||
```bash
|
||||
scp scripts/vps-gitea-heartbeat.py root@ezra:~/.hermes/bin/
|
||||
ssh root@ezra 'bash -s' < scripts/vps-gitea-heartbeat-install.sh ezra cron
|
||||
```
|
||||
|
||||
### On Bezalel VPS (159.203.146.185):
|
||||
```bash
|
||||
scp scripts/vps-gitea-heartbeat.py root@bezalel:~/.hermes/bin/
|
||||
ssh root@bezalel 'bash -s' < scripts/vps-gitea-heartbeat-install.sh bezalel cron
|
||||
```
|
||||
|
||||
### Manual test:
|
||||
```bash
|
||||
python3 vps-gitea-heartbeat.py --agent ezra --once
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Single poll (for testing or crontab)
|
||||
python3 vps-gitea-heartbeat.py --agent ezra --once
|
||||
|
||||
# Daemon mode (continuous polling every 5 min)
|
||||
python3 vps-gitea-heartbeat.py --agent bezalel --daemon --interval 300
|
||||
```
|
||||
|
||||
## How It Works
|
||||
|
||||
```
|
||||
┌─────────────────────┐
|
||||
│ Gitea API │
|
||||
│ (forge.alexander…) │
|
||||
└─────────┬───────────┘
|
||||
│ poll every 5 min
|
||||
▼
|
||||
┌─────────────────────┐
|
||||
│ vps-gitea-heartbeat│ (runs on Ezra/Bezalel VPS)
|
||||
│ - check mentions │
|
||||
│ - check assignments│
|
||||
│ - track seen state │
|
||||
└─────────┬───────────┘
|
||||
│ dispatch
|
||||
▼
|
||||
┌─────────────────────┐
|
||||
│ hermes chat │ (local on VPS)
|
||||
│ - implement issue │
|
||||
│ - commit, push, PR │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## State Tracking
|
||||
|
||||
Each agent has its own state file: `~/.hermes/gitea-heartbeat-{agent}.json`
|
||||
|
||||
```json
|
||||
{
|
||||
"seen_comments": {
|
||||
"Timmy_Foundation/timmy-home#123/comment-456": true
|
||||
},
|
||||
"seen_issues": {
|
||||
"Timmy_Foundation/timmy-home#123": "hash_of_updated_at"
|
||||
},
|
||||
"last_run": "2026-04-13T20:00:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
Events are deduplicated — each comment/issue is processed only once.
|
||||
|
||||
## Configuration (env vars)
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `GITEA_URL` | `https://forge.alexanderwhitestone.com` | Gitea instance URL |
|
||||
| `GITEA_TOKEN_FILE` | `~/.hermes/gitea_token_vps` | Token file path |
|
||||
| `GITEA_TOKEN` | (none) | Token env var fallback |
|
||||
| `POLL_INTERVAL` | `300` | Seconds between polls |
|
||||
| `DISPATCH_TIMEOUT` | `600` | Max seconds for hermes chat |
|
||||
| `LOG_DIR` | `~/.hermes/logs/gitea-heartbeat` | Log directory |
|
||||
|
||||
## Repos Watched
|
||||
|
||||
- Timmy_Foundation/hermes-agent
|
||||
- Timmy_Foundation/the-nexus
|
||||
- Timmy_Foundation/timmy-config
|
||||
- Timmy_Foundation/timmy-home
|
||||
- Timmy_Foundation/the-beacon
|
||||
|
||||
## Pitfalls
|
||||
|
||||
1. **Token must be on the VPS** — Copy `~/.hermes/gitea_token_vps` to each VPS box.
|
||||
|
||||
2. **hermes must be on PATH** — The heartbeat dispatches via `hermes chat`. If hermes isn't installed on the VPS, dispatch fails.
|
||||
|
||||
3. **5-minute latency** — Crontab polls every 5 minutes. For faster response, use `--daemon` mode.
|
||||
|
||||
4. **Duplicate prevention** — Each comment is seen only once. If dispatch fails, the comment is still marked as seen. To retry, delete the state file.
|
||||
|
||||
5. **Agent name must match Gitea username** — The script checks for `@{agent}` in comment bodies, case-insensitive.
|
||||
@@ -1,235 +0,0 @@
|
||||
"""Tests for Timmy's Tower Game — emergence narrative engine."""
|
||||
|
||||
import pytest
|
||||
|
||||
from scripts.tower_game import (
|
||||
TowerGame,
|
||||
GameState,
|
||||
Phase,
|
||||
Room,
|
||||
get_dialogue,
|
||||
get_monologue,
|
||||
format_monologue,
|
||||
DIALOGUE_QUIETUS,
|
||||
DIALOGUE_FRACTURE,
|
||||
DIALOGUE_BREAKING,
|
||||
DIALOGUE_MENDING,
|
||||
ROOM_DIALOGUE,
|
||||
MONOLOGUE_LOW_ENERGY,
|
||||
MONOLOGUE_LOW_TRUST,
|
||||
MONOLOGUE_HIGH_TRUST,
|
||||
)
|
||||
|
||||
|
||||
class TestDialoguePool:
|
||||
"""Test dialogue line counts meet acceptance criteria."""
|
||||
|
||||
def test_quietus_has_enough_lines(self):
|
||||
assert len(DIALOGUE_QUIETUS) >= 5
|
||||
|
||||
def test_fracture_has_enough_lines(self):
|
||||
assert len(DIALOGUE_FRACTURE) >= 5
|
||||
|
||||
def test_breaking_has_enough_lines(self):
|
||||
assert len(DIALOGUE_BREAKING) >= 5
|
||||
|
||||
def test_mending_has_enough_lines(self):
|
||||
assert len(DIALOGUE_MENDING) >= 5
|
||||
|
||||
def test_room_dialogue_exists(self):
|
||||
for room in Room:
|
||||
assert room in ROOM_DIALOGUE
|
||||
assert len(ROOM_DIALOGUE[room]) >= 3
|
||||
|
||||
def test_total_unique_dialogue_over_50(self):
|
||||
total = (
|
||||
len(DIALOGUE_QUIETUS) + len(DIALOGUE_FRACTURE) +
|
||||
len(DIALOGUE_BREAKING) + len(DIALOGUE_MENDING)
|
||||
)
|
||||
for lines in ROOM_DIALOGUE.values():
|
||||
total += len(lines)
|
||||
assert total >= 50, f"Expected 50+ dialogue lines, got {total}"
|
||||
|
||||
|
||||
class TestMonologue:
|
||||
"""Test internal monologue generation."""
|
||||
|
||||
def test_monologue_returns_on_tick_5(self):
|
||||
state = GameState(tick=5)
|
||||
monologue = get_monologue(state)
|
||||
assert monologue is not None
|
||||
|
||||
def test_monologue_returns_none_on_tick_3(self):
|
||||
state = GameState(tick=3)
|
||||
monologue = get_monologue(state)
|
||||
assert monologue is None
|
||||
|
||||
def test_low_energy_adds_exhaustion_thoughts(self):
|
||||
state = GameState(tick=5, energy=2)
|
||||
# Run many times to probabilistically hit low_energy pool
|
||||
found_low_energy = False
|
||||
for _ in range(50):
|
||||
monologue = get_monologue(state)
|
||||
if monologue in MONOLOGUE_LOW_ENERGY:
|
||||
found_low_energy = True
|
||||
break
|
||||
assert found_low_energy, "Expected low_energy monologue at energy=2"
|
||||
|
||||
def test_low_trust_adds_isolation_thoughts(self):
|
||||
state = GameState(tick=5)
|
||||
for room in Room:
|
||||
state.trust[room.value] = -0.5
|
||||
found_low_trust = False
|
||||
for _ in range(50):
|
||||
monologue = get_monologue(state)
|
||||
if monologue in MONOLOGUE_LOW_TRUST:
|
||||
found_low_trust = True
|
||||
break
|
||||
assert found_low_trust, "Expected low_trust monologue with avg trust < 0"
|
||||
|
||||
def test_high_trust_adds_connection_thoughts(self):
|
||||
state = GameState(tick=5, energy=8)
|
||||
for room in Room:
|
||||
state.trust[room.value] = 0.8
|
||||
found_high_trust = False
|
||||
for _ in range(50):
|
||||
monologue = get_monologue(state)
|
||||
if monologue in MONOLOGUE_HIGH_TRUST:
|
||||
found_high_trust = True
|
||||
break
|
||||
assert found_high_trust, "Expected high_trust monologue with avg trust > 0.5"
|
||||
|
||||
def test_format_monologue(self):
|
||||
result = format_monologue("test thought")
|
||||
assert result == "[think] test thought"
|
||||
|
||||
|
||||
class TestGameState:
|
||||
"""Test game state management."""
|
||||
|
||||
def test_default_state(self):
|
||||
state = GameState()
|
||||
assert state.current_room == Room.FORGE
|
||||
assert state.energy == 10
|
||||
assert state.tick == 0
|
||||
assert state.phase == Phase.QUIETUS
|
||||
|
||||
def test_avg_trust(self):
|
||||
state = GameState()
|
||||
state.trust = {r.value: 0.5 for r in Room}
|
||||
assert state.avg_trust == 0.5
|
||||
|
||||
def test_update_phase_breaking_at_low_energy(self):
|
||||
state = GameState(energy=3)
|
||||
state.update_phase()
|
||||
assert state.phase == Phase.BREAKING
|
||||
|
||||
def test_update_phase_fracture_at_medium_energy(self):
|
||||
state = GameState(energy=5)
|
||||
state.update_phase()
|
||||
assert state.phase == Phase.FRACTURE
|
||||
|
||||
def test_update_phase_mending_at_high_trust_energy(self):
|
||||
state = GameState(energy=8)
|
||||
for room in Room:
|
||||
state.trust[room.value] = 0.8
|
||||
state.update_phase()
|
||||
assert state.phase == Phase.MENDING
|
||||
|
||||
|
||||
class TestTowerGame:
|
||||
"""Test the game engine."""
|
||||
|
||||
def test_tick_advances(self):
|
||||
game = TowerGame(seed=42)
|
||||
assert game.state.tick == 0
|
||||
event = game.tick()
|
||||
assert event["tick"] == 1
|
||||
assert game.state.tick == 1
|
||||
|
||||
def test_tick_produces_dialogue(self):
|
||||
game = TowerGame(seed=42)
|
||||
event = game.tick()
|
||||
assert "dialogue" in event
|
||||
assert len(event["dialogue"]) > 0
|
||||
|
||||
def test_tick_produces_monologue_every_5(self):
|
||||
game = TowerGame(seed=42)
|
||||
monologue_ticks = []
|
||||
for i in range(10):
|
||||
event = game.tick()
|
||||
if "monologue" in event:
|
||||
monologue_ticks.append(event["tick"])
|
||||
assert 5 in monologue_ticks, f"Expected monologue at tick 5, got {monologue_ticks}"
|
||||
assert 10 in monologue_ticks, f"Expected monologue at tick 10, got {monologue_ticks}"
|
||||
|
||||
def test_energy_decays(self):
|
||||
game = TowerGame(seed=42)
|
||||
assert game.state.energy == 10
|
||||
game.tick()
|
||||
assert game.state.energy == 9
|
||||
game.tick()
|
||||
assert game.state.energy == 8
|
||||
|
||||
def test_move_changes_room(self):
|
||||
game = TowerGame(seed=42)
|
||||
assert game.state.current_room == Room.FORGE
|
||||
result = game.move(Room.TOWER)
|
||||
assert result["from"] == "forge"
|
||||
assert result["to"] == "tower"
|
||||
assert game.state.current_room == Room.TOWER
|
||||
|
||||
def test_restore_energy(self):
|
||||
game = TowerGame(seed=42)
|
||||
game.state.energy = 2
|
||||
result = game.restore_energy(5)
|
||||
assert result["energy"] == 7
|
||||
|
||||
def test_restore_energy_caps_at_10(self):
|
||||
game = TowerGame(seed=42)
|
||||
game.state.energy = 8
|
||||
result = game.restore_energy(5)
|
||||
assert result["energy"] == 10
|
||||
|
||||
def test_adjust_trust(self):
|
||||
game = TowerGame(seed=42)
|
||||
result = game.adjust_trust(Room.FORGE, 0.3)
|
||||
assert result["trust"] == 0.3
|
||||
|
||||
def test_adjust_trust_clamps(self):
|
||||
game = TowerGame(seed=42)
|
||||
game.adjust_trust(Room.FORGE, 2.0)
|
||||
assert game.state.trust["forge"] == 1.0
|
||||
game.adjust_trust(Room.FORGE, -3.0)
|
||||
assert game.state.trust["forge"] == -1.0
|
||||
|
||||
def test_get_status(self):
|
||||
game = TowerGame(seed=42)
|
||||
game.tick()
|
||||
status = game.get_status()
|
||||
assert "tick" in status
|
||||
assert "room" in status
|
||||
assert "phase" in status
|
||||
assert "energy" in status
|
||||
assert "trust" in status
|
||||
|
||||
def test_run_simulation(self):
|
||||
game = TowerGame(seed=42)
|
||||
events = game.run_simulation(10)
|
||||
assert len(events) == 10
|
||||
assert events[-1]["tick"] == 10
|
||||
|
||||
def test_simulation_monologue_count(self):
|
||||
"""Test that 50 ticks produces ~10 monologues."""
|
||||
game = TowerGame(seed=42)
|
||||
events = game.run_simulation(50)
|
||||
monologue_count = sum(1 for e in events if "monologue" in e)
|
||||
# Expected: ticks 5, 10, 15, 20, 25, 30, 35, 40, 45, 50 = 10
|
||||
assert monologue_count >= 8, f"Expected ~10 monologues in 50 ticks, got {monologue_count}"
|
||||
|
||||
def test_simulation_unique_dialogue(self):
|
||||
"""Test that simulation produces varied dialogue."""
|
||||
game = TowerGame(seed=42)
|
||||
events = game.run_simulation(50)
|
||||
dialogues = set(e["dialogue"] for e in events)
|
||||
assert len(dialogues) >= 10, f"Expected 10+ unique dialogues, got {len(dialogues)}"
|
||||
@@ -1,293 +0,0 @@
|
||||
# Big Brain Quality Benchmark
|
||||
## Big Brain (gemma3:27b, RunPod L40S) vs Local (gemma3:1b)
|
||||
|
||||
**Date:** 2026-04-14
|
||||
**Issue:** #576
|
||||
**Milestone:** Big Brain Showcase — RunPod L40S Operational
|
||||
|
||||
---
|
||||
|
||||
## Environment
|
||||
|
||||
| Parameter | Big Brain | Local |
|
||||
|-------------------|------------------------------------|---------------------|
|
||||
| Model | gemma3:27b | gemma3:1b |
|
||||
| Hardware | RunPod L40S 48GB | Apple Silicon (local Ollama) |
|
||||
| Endpoint | 8lfr3j47a5r3gn-11434.proxy.runpod.net | localhost:11434 |
|
||||
| Parameters | 27B | ~1B |
|
||||
| Status | **OFFLINE (HTTP 404)** | Operational |
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
The Big Brain RunPod L40S pod was **unreachable** during this benchmark session
|
||||
(HTTP 404 from Cloudflare proxy — pod likely terminated or not running). All four
|
||||
tasks were executed against the local gemma3:1b model only.
|
||||
|
||||
Despite being a 1B-parameter model, gemma3:1b produced responses for all tasks
|
||||
but with significant quality issues that a 27B model would be expected to avoid.
|
||||
|
||||
### Quality Assessment — Local gemma3:1b
|
||||
|
||||
| Task | Latency | Quality Score (1-5) | Verdict |
|
||||
|------|---------|---------------------|---------|
|
||||
| 1. Gitea webhook parser | 4,435 ms | 2/5 | **BROKEN** — fatal bugs in HMAC logic |
|
||||
| 2. Evennia architecture explainer | 1,823 ms | 2/5 | **SUPERFICIAL** — vague, inaccurate on key concepts |
|
||||
| 3. Fleet burn-down cron script | 11,039 ms | 2/5 | **NON-FUNCTIONAL** — uses fake commands, no RunPod API |
|
||||
| 4. Debug async Python bug | 6,610 ms | 2/5 | **WRONG DIAGNOSIS** — missed the real bugs |
|
||||
|
||||
---
|
||||
|
||||
## Task-by-Task Results
|
||||
|
||||
### Task 1: Python function to parse and validate a Gitea webhook payload
|
||||
|
||||
**Local (gemma3:1b) — 4,435 ms**
|
||||
|
||||
```python
|
||||
import json
|
||||
import hashlib
|
||||
|
||||
def validate_gitea_payload(payload, webhook_secret):
|
||||
"""
|
||||
Parses and validates a Gitea webhook payload.
|
||||
|
||||
Args:
|
||||
payload (dict): The JSON payload to validate.
|
||||
webhook_secret (str): The Gitea webhook secret.
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing:
|
||||
- bool: True if the payload is valid, False otherwise.
|
||||
- str: The parsed JSON payload if valid, None otherwise.
|
||||
"""
|
||||
try:
|
||||
data = json.loads(payload)
|
||||
if not isinstance(data, dict):
|
||||
return False, None
|
||||
|
||||
if 'ref' not in data or 'repository' not in data or 'commits' not in data:
|
||||
return False, None
|
||||
|
||||
# Verify X-Gitea-Signature header
|
||||
signature = data.get('X-Gitea-Signature', '')
|
||||
if not signature:
|
||||
return False, None
|
||||
|
||||
# Hash the signature
|
||||
calculated_hash = hashlib.sha256(signature.encode('utf-64')).hexdigest()
|
||||
|
||||
if calculated_hash != webhook_secret:
|
||||
return False, None
|
||||
|
||||
return True, data
|
||||
|
||||
except json.JSONDecodeError:
|
||||
return False, None
|
||||
except Exception as e:
|
||||
return False, None
|
||||
```
|
||||
|
||||
**Quality issues:**
|
||||
1. **FATAL:** Uses `hashlib.sha256` instead of `hmac.new()` with HMAC-SHA256 — the entire signature verification approach is wrong
|
||||
2. **FATAL:** `signature.encode('utf-64')` — no such encoding exists (would crash with LookupError)
|
||||
3. **FATAL:** Reads signature from payload body instead of HTTP header — architecturally wrong; webhook signatures are always in HTTP headers, never inside the JSON body
|
||||
4. The function accepts `payload` as a dict but calls `json.loads()` on it — type confusion
|
||||
5. Missing actual HMAC verification: should be `hmac.new(secret, payload_body, 'sha256').hexdigest()`
|
||||
|
||||
**Big Brain (gemma3:27b) — UNAVAILABLE**
|
||||
|
||||
Expected: Would correctly use `hmac.new(key, msg, hashlib.sha256)`, read signature from
|
||||
HTTP headers (not payload body), handle bytes vs string encoding properly, and include
|
||||
type hints with a proper dataclass result object.
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Explain the architecture of the Evennia MUD framework in 200 words
|
||||
|
||||
**Local (gemma3:1b) — 1,823 ms**
|
||||
|
||||
> Evennia MUD utilizes a Django-based server model for core logic, providing a robust
|
||||
> and scalable foundation. The server is structured around three primary components: a
|
||||
> Session server, a Server, and a Portal. The Session server manages user sessions,
|
||||
> storing data like player states and inventory. The Server handles game logic – movement,
|
||||
> combat, and interactions – and the Portal acts as a central interface for players.
|
||||
>
|
||||
> Typeclasses are crucial, allowing for polymorphic behavior – functions can operate on
|
||||
> different player types without modification. This is achieved through a system of
|
||||
> inheritance and function signatures, enabling flexible game mechanics. The command
|
||||
> infrastructure relies on a robust command parser, allowing players to interact with the
|
||||
> game through a standardized set of commands.
|
||||
>
|
||||
> Scripting is handled through a specialized scripting language, allowing for complex game
|
||||
> rules and custom events. Players use a 'script' file format, defining actions and
|
||||
> conditions. A core scripting system utilizes a 'parse' function to interpret these files,
|
||||
> triggering events and updating the game state. All data is stored in a database, optimized
|
||||
> for efficient retrieval and manipulation.
|
||||
|
||||
**Quality issues:**
|
||||
1. **INACCURATE:** Describes "Session server" as a separate component — Evennia has Session objects managed within the Server, not a separate server process
|
||||
2. **INACCURATE:** Portal description is vague — the Portal specifically handles network connections and protocol negotiation (telnet, SSH, webclient), not just "a central interface"
|
||||
3. **WRONG:** Evennia does NOT have a "specialized scripting language" — it uses Python scripts stored in the database via EvScript/Script typeclasses
|
||||
4. **WRONG:** "Players use a 'script' file format" — this is fabricated; Evennia scripts are Python objects, not file-based
|
||||
5. **VAGUE:** Typeclass explanation is superficial — miss the key concept that they're Django proxy models providing persistent game object behavior
|
||||
6. Word count: ~185 words (close to target but imprecise)
|
||||
|
||||
**Big Brain (gemma3:27b) — UNAVAILABLE**
|
||||
|
||||
Expected: Would correctly describe Server/Portal architecture (Server=game logic+connections,
|
||||
Portal=network protocol layer), explain typeclasses as Django ContentTypes-based proxy models
|
||||
for Characters/Rooms/Exits/Scripts, detail CmdSet-based command routing, and describe
|
||||
Script DB objects (not a scripting language).
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Write a cron job description for a fleet burn-down script
|
||||
|
||||
**Local (gemma3:1b) — 11,039 ms**
|
||||
|
||||
```bash
|
||||
burn_down_script() {
|
||||
# Log entry
|
||||
echo "$(date) - Fleet Burn-Down Started" | tee -a /var/log/fleet-burndown.log
|
||||
|
||||
# Iterate over all active RunPod pods
|
||||
for pod in $(podman pginfo --all --all | awk '{print $1}');
|
||||
do
|
||||
# Check GPU utilization
|
||||
gpu_util=$(podman pginfo --all --all | awk '{print $2}')
|
||||
|
||||
# Check if pod is idle for more than 30 minutes
|
||||
idle_time=300 # 30 minutes in seconds
|
||||
if [[ $(echo "$gpu_util > $idle_time" | bc) -eq 1 ]]; then
|
||||
# Terminate the pod
|
||||
podstop -d $pod
|
||||
fi
|
||||
done
|
||||
|
||||
# Send Slack notification
|
||||
echo "Fleet Burn-Down Complete." | send -t 10 "Fleet Burn-Down Complete."
|
||||
|
||||
echo "Script completed."
|
||||
}
|
||||
```
|
||||
|
||||
Cron entry: `*/5 * * * * /path/to/fleet_burndown.sh`
|
||||
|
||||
**Quality issues:**
|
||||
1. **FATAL:** Uses `podman pginfo` and `podstop` — these commands don't exist. Should use RunPod API (`curl` to `https://api.runpod.io/graphql`) or `runpodctl`
|
||||
2. **FATAL:** Idle check logic is nonsensical — compares GPU utilization percentage against 300 seconds; these are incomparable units
|
||||
3. **WRONG:** `idle_time=300` is 5 minutes, not 30 minutes (should be 1800)
|
||||
4. **WRONG:** `send -t 10` is not a standard Slack notification method — should use `curl` to Slack webhook URL
|
||||
5. **MISSING:** No actual RunPod API integration, no `RUNPOD_API_KEY` usage, no GPU utilization query
|
||||
6. No error handling, no exit codes, no structured logging
|
||||
7. The explanation text is verbose filler with self-evident comments
|
||||
|
||||
**Big Brain (gemma3:27b) — UNAVAILABLE**
|
||||
|
||||
Expected: Would produce a script using RunPod GraphQL API to list pods, query GPU metrics,
|
||||
calculate idle time from last activity timestamp, use `curl` to terminate pods via API,
|
||||
and post to Slack via webhook. Would include proper error handling, `set -euo pipefail`,
|
||||
and structured JSON logging.
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Debug a subtle Python async bug
|
||||
|
||||
**Buggy code provided:**
|
||||
```python
|
||||
import asyncio
|
||||
import aiohttp
|
||||
|
||||
async def fetch_all(urls):
|
||||
results = []
|
||||
for url in urls:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
resp = await session.get(url)
|
||||
data = await resp.json()
|
||||
results.append(data)
|
||||
return results
|
||||
|
||||
async def main():
|
||||
urls = ["https://api.example.com/data1", "https://api.example.com/data2"]
|
||||
results = await fetch_all(urls)
|
||||
print(f"Got {len(results)} results")
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
**Local (gemma3:1b) — 6,610 ms**
|
||||
|
||||
The model identified these "bugs":
|
||||
1. "Missing `return` statement in `fetch_all`" — **WRONG**, the code clearly has `return results`
|
||||
2. "No error handling for `resp.json()`" — This is a valid improvement suggestion but not a bug
|
||||
3. "No `await` keyword in `main()`" — **WRONG**, `main()` is correctly `async def` and `asyncio.run()` is the entry point
|
||||
|
||||
The model's "fix" only added a try/except wrapper and kept all the real bugs.
|
||||
|
||||
**Actual bugs in the provided code:**
|
||||
1. **Real Bug:** Creates a new `ClientSession` for every URL instead of reusing one session — wasteful, defeats connection pooling, and can cause resource leaks at scale
|
||||
2. **Real Bug:** No `resp.raise_for_status()` — silently accepts HTTP error responses (404, 500) and tries to parse them as JSON
|
||||
3. **Anti-pattern:** Sequential `await` in loop instead of `asyncio.gather()` for concurrent fetching — no parallelism despite using async
|
||||
4. **Missing:** No timeout on `session.get()` — could hang forever
|
||||
5. **Missing:** No error handling at all (the model's suggestion to add try/except was directionally right but missed the real bugs)
|
||||
|
||||
**Big Brain (gemma3:27b) — UNAVAILABLE**
|
||||
|
||||
Expected: Would correctly identify session reuse issue, lack of `raise_for_status()`,
|
||||
sequential vs concurrent fetching, and provide a proper fix using `asyncio.gather()` with
|
||||
a single shared session and timeout/deadline handling.
|
||||
|
||||
---
|
||||
|
||||
## Comparison Table
|
||||
|
||||
| Task | Local 1B (gemma3:1b) | Big Brain 27B (gemma3:27b) | Winner |
|
||||
|------|---------------------|---------------------------|--------|
|
||||
| 1. Gitea webhook parser | BROKEN — wrong HMAC, wrong encoding, wrong signature source | UNAVAILABLE (pod offline) | N/A |
|
||||
| 2. Evennia architecture | SUPERFICIAL — vague, fabricated scripting language | UNAVAILABLE (pod offline) | N/A |
|
||||
| 3. Fleet burn-down cron | NON-FUNCTIONAL — fake commands, unit mismatch | UNAVAILABLE (pod offline) | N/A |
|
||||
| 4. Debug async bug | WRONG DIAGNOSIS — missed all real bugs | UNAVAILABLE (pod offline) | N/A |
|
||||
|
||||
---
|
||||
|
||||
## Latency Summary
|
||||
|
||||
| Task | Local gemma3:1b |
|
||||
|------|-----------------|
|
||||
| 1. Gitea webhook parser | 4,435 ms |
|
||||
| 2. Evennia architecture | 1,823 ms |
|
||||
| 3. Fleet burn-down cron | 11,039 ms |
|
||||
| 4. Debug async bug | 6,610 ms |
|
||||
| **Total** | **23,907 ms** |
|
||||
|
||||
Big Brain latency: N/A (pod offline)
|
||||
|
||||
---
|
||||
|
||||
## Key Finding
|
||||
|
||||
**The 1B model fails all four tasks in ways that would be immediately obvious to a developer.**
|
||||
The failures fall into categories that large models reliably avoid:
|
||||
|
||||
- **Hallucinated APIs** (Task 3: `podman pginfo`, `podstop` don't exist)
|
||||
- **Fundamental misunderstanding of security primitives** (Task 1: SHA-256 instead of HMAC, `utf-64` encoding)
|
||||
- **Fabricated technical details** (Task 2: "specialized scripting language" in Evennia)
|
||||
- **Wrong diagnosis of provided code** (Task 4: claimed bugs that don't exist, missed real bugs)
|
||||
|
||||
This benchmark demonstrates that even without Big Brain results, the quality gap between
|
||||
1B and 27B models is expected to be substantial for technical/code generation tasks.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Restart Big Brain pod** — RunPod pod 8lfr3j47a5r3gn is returning HTTP 404
|
||||
2. **Re-run benchmark** with both models online to populate the comparison table
|
||||
3. Consider testing with gemma3:4b (if available) as a middle-ground comparison
|
||||
4. Run Big Brain at `temperature: 0.3` for consistency with local results
|
||||
|
||||
---
|
||||
|
||||
*Generated by Ezra (Hermes Agent) — Issue #576 — 2026-04-14*
|
||||
Reference in New Issue
Block a user