Compare commits
2 Commits
fix/692
...
fix/640-vp
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
62f0f48006 | ||
|
|
038f1ab7f4 |
116
scripts/vps-gitea-heartbeat-install.sh
Normal file
116
scripts/vps-gitea-heartbeat-install.sh
Normal file
@@ -0,0 +1,116 @@
|
||||
#!/bin/bash
|
||||
# ============================================================================
|
||||
# VPS Gitea Heartbeat Installer
|
||||
# ============================================================================
|
||||
#
|
||||
# Installs the Gitea heartbeat poller as a cron job or systemd service.
|
||||
#
|
||||
# Usage:
|
||||
# bash vps-gitea-heartbeat-install.sh [agent] [cron|systemd]
|
||||
#
|
||||
# Examples:
|
||||
# bash vps-gitea-heartbeat-install.sh ezra cron
|
||||
# bash vps-gitea-heartbeat-install.sh bezalel systemd
|
||||
# ============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
AGENT="${1:?Usage: $0 AGENT [cron|systemd]}"
|
||||
MODE="${2:-cron}"
|
||||
SCRIPT_PATH="${HOME}/.hermes/bin/vps-gitea-heartbeat.py"
|
||||
INTERVAL=300 # 5 minutes
|
||||
|
||||
GREEN='\033[0;32m'
|
||||
CYAN='\033[0;36m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m'
|
||||
|
||||
echo -e "${CYAN}VPS Gitea Heartbeat Installer${NC}"
|
||||
echo -e " Agent: ${AGENT}"
|
||||
echo -e " Mode: ${MODE}"
|
||||
echo ""
|
||||
|
||||
# Ensure script exists
|
||||
if [ ! -f "$SCRIPT_PATH" ]; then
|
||||
echo -e "${RED}Error: ${SCRIPT_PATH} not found${NC}"
|
||||
echo "Copy vps-gitea-heartbeat.py to ~/.hermes/bin/ first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure token exists
|
||||
TOKEN_FILE="${HOME}/.hermes/gitea_token_vps"
|
||||
if [ ! -f "$TOKEN_FILE" ]; then
|
||||
# Try alternate location
|
||||
TOKEN_FILE="${HOME}/.config/gitea/token"
|
||||
if [ ! -f "$TOKEN_FILE" ]; then
|
||||
echo -e "${RED}Error: No Gitea token found${NC}"
|
||||
echo "Create ~/.hermes/gitea_token_vps with your Gitea API token."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✓${NC} Token found"
|
||||
|
||||
# Create log directory
|
||||
mkdir -p "${HOME}/.hermes/logs/gitea-heartbeat"
|
||||
|
||||
if [ "$MODE" = "cron" ]; then
|
||||
# ── Cron installation ────────────────────────────────────────────────
|
||||
CRON_LINE="*/5 * * * * /usr/bin/python3 ${SCRIPT_PATH} --agent ${AGENT} --once >> ${HOME}/.hermes/logs/gitea-heartbeat/cron-${AGENT}.log 2>&1"
|
||||
|
||||
# Check if already installed
|
||||
if crontab -l 2>/dev/null | grep -q "vps-gitea-heartbeat.*--agent ${AGENT}"; then
|
||||
echo -e "${CYAN}Cron job already exists for ${AGENT}, updating...${NC}"
|
||||
# Remove old entry
|
||||
crontab -l 2>/dev/null | grep -v "vps-gitea-heartbeat.*--agent ${AGENT}" | crontab -
|
||||
fi
|
||||
|
||||
# Add new entry
|
||||
(crontab -l 2>/dev/null; echo "$CRON_LINE") | crontab -
|
||||
echo -e "${GREEN}✓${NC} Cron job installed (every 5 minutes)"
|
||||
echo ""
|
||||
echo "Verify with: crontab -l"
|
||||
|
||||
elif [ "$MODE" = "systemd" ]; then
|
||||
# ── Systemd installation ─────────────────────────────────────────────
|
||||
SERVICE_NAME="gitea-heartbeat-${AGENT}"
|
||||
SERVICE_FILE="/etc/systemd/system/${SERVICE_NAME}.service"
|
||||
|
||||
cat > "/tmp/${SERVICE_NAME}.service" << EOF
|
||||
[Unit]
|
||||
Description=Gitea Heartbeat for ${AGENT}
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
ExecStart=/usr/bin/python3 ${SCRIPT_PATH} --agent ${AGENT} --daemon --interval ${INTERVAL}
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
User=root
|
||||
Environment=HOME=${HOME}
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
EOF
|
||||
|
||||
echo "Installing systemd service..."
|
||||
echo -e "${CYAN}Run as root:${NC}"
|
||||
echo " sudo cp /tmp/${SERVICE_NAME}.service ${SERVICE_FILE}"
|
||||
echo " sudo systemctl daemon-reload"
|
||||
echo " sudo systemctl enable ${SERVICE_NAME}"
|
||||
echo " sudo systemctl start ${SERVICE_NAME}"
|
||||
echo ""
|
||||
echo "Or copy-paste:"
|
||||
echo " sudo bash -c 'cat > ${SERVICE_FILE}' < /tmp/${SERVICE_NAME}.service && sudo systemctl daemon-reload && sudo systemctl enable --now ${SERVICE_NAME}"
|
||||
|
||||
else
|
||||
echo -e "${RED}Unknown mode: ${MODE}${NC}"
|
||||
echo "Use 'cron' or 'systemd'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo -e "${GREEN}✓ Installation complete${NC}"
|
||||
echo ""
|
||||
echo "Test with:"
|
||||
echo " python3 ${SCRIPT_PATH} --agent ${AGENT} --once --verbose"
|
||||
369
scripts/vps-gitea-heartbeat.py
Normal file
369
scripts/vps-gitea-heartbeat.py
Normal file
@@ -0,0 +1,369 @@
|
||||
#!/usr/bin/env python3
|
||||
"""VPS Gitea Heartbeat — Poll Gitea for @mentions and issue assignments.
|
||||
|
||||
Runs on Ezra/Bezalel VPS boxes. Every N minutes:
|
||||
1. Polls Gitea API for issues assigned to this agent
|
||||
2. Scans recent comments for @mentions of this agent
|
||||
3. Dispatches actionable events to local `hermes chat`
|
||||
4. Tracks seen events to avoid duplicates
|
||||
|
||||
Usage:
|
||||
python3 vps-gitea-heartbeat.py --agent ezra --once # Single poll
|
||||
python3 vps-gitea-heartbeat.py --agent bezalel --daemon # Continuous mode
|
||||
|
||||
Ref: #579, #640
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration
|
||||
GITEA_BASE = "https://forge.alexanderwhitestone.com/api/v1"
|
||||
DEFAULT_INTERVAL = 300 # 5 minutes
|
||||
STATE_DIR = Path.home() / ".hermes"
|
||||
LOG_DIR = STATE_DIR / "logs" / "gitea-heartbeat"
|
||||
|
||||
# Repos to scan for mentions
|
||||
WATCHED_REPOS = [
|
||||
"Timmy_Foundation/timmy-home",
|
||||
"Timmy_Foundation/hermes-agent",
|
||||
"Timmy_Foundation/timmy-config",
|
||||
"Timmy_Foundation/the-beacon",
|
||||
]
|
||||
|
||||
|
||||
def load_gitea_token() -> str:
|
||||
"""Load Gitea API token from standard locations."""
|
||||
token_paths = [
|
||||
Path.home() / ".config" / "gitea" / "token",
|
||||
Path.home() / ".hermes" / "gitea_token_vps",
|
||||
Path.home() / ".hermes" / "gitea_token",
|
||||
]
|
||||
for path in token_paths:
|
||||
if path.exists():
|
||||
return path.read_text().strip()
|
||||
return os.environ.get("GITEA_TOKEN", "")
|
||||
|
||||
|
||||
def gitea_request(path: str, token: str, method: str = "GET", data: Any = None) -> Any:
|
||||
"""Make a Gitea API request."""
|
||||
url = f"{GITEA_BASE}{path}"
|
||||
headers = {
|
||||
"Authorization": f"token {token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
body = json.dumps(data).encode() if data else None
|
||||
req = urllib.request.Request(url, data=body, headers=headers, method=method)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
logger.warning(f"Gitea API error {e.code}: {path}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"Gitea request failed: {e}")
|
||||
return None
|
||||
|
||||
|
||||
class HeartbeatState:
|
||||
"""Persistent state for deduplication."""
|
||||
|
||||
def __init__(self, agent: str):
|
||||
self.agent = agent
|
||||
self.state_file = STATE_DIR / f"gitea-heartbeat-{agent}.json"
|
||||
self.seen_comment_ids: Set[str] = set()
|
||||
self.seen_issue_hashes: Set[str] = set()
|
||||
self.last_poll: str = ""
|
||||
self._load()
|
||||
|
||||
def _load(self):
|
||||
if self.state_file.exists():
|
||||
try:
|
||||
data = json.loads(self.state_file.read_text())
|
||||
self.seen_comment_ids = set(data.get("seen_comment_ids", []))
|
||||
self.seen_issue_hashes = set(data.get("seen_issue_hashes", []))
|
||||
self.last_poll = data.get("last_poll", "")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load state: {e}")
|
||||
|
||||
def save(self):
|
||||
self.state_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {
|
||||
"agent": self.agent,
|
||||
"seen_comment_ids": sorted(self.seen_comment_ids),
|
||||
"seen_issue_hashes": sorted(self.seen_issue_hashes),
|
||||
"last_poll": self.last_poll,
|
||||
"updated_at": datetime.now(timezone.utc).isoformat(),
|
||||
}
|
||||
self.state_file.write_text(json.dumps(data, indent=2))
|
||||
|
||||
def is_comment_seen(self, comment_id: str) -> bool:
|
||||
return comment_id in self.seen_comment_ids
|
||||
|
||||
def mark_comment_seen(self, comment_id: str):
|
||||
self.seen_comment_ids.add(comment_id)
|
||||
# Keep set bounded
|
||||
if len(self.seen_comment_ids) > 10000:
|
||||
self.seen_comment_ids = set(sorted(self.seen_comment_ids)[-5000:])
|
||||
|
||||
def is_issue_seen(self, issue_key: str) -> bool:
|
||||
return issue_key in self.seen_issue_hashes
|
||||
|
||||
def mark_issue_seen(self, issue_key: str):
|
||||
self.seen_issue_hashes.add(issue_key)
|
||||
|
||||
|
||||
def poll_assigned_issues(
|
||||
agent: str, token: str, state: HeartbeatState
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Check for issues assigned to this agent."""
|
||||
events = []
|
||||
mention_tag = f"@{agent}"
|
||||
|
||||
for repo in WATCHED_REPOS:
|
||||
# Get open issues
|
||||
issues = gitea_request(f"/repos/{repo}/issues?state=open&limit=20", token)
|
||||
if not issues:
|
||||
continue
|
||||
|
||||
for issue in issues:
|
||||
# Check if assigned to this agent
|
||||
assignee = issue.get("assignee", {})
|
||||
if assignee and assignee.get("login", "").lower() == agent.lower():
|
||||
issue_key = f"{repo}#{issue['number']}"
|
||||
if not state.is_issue_seen(issue_key):
|
||||
events.append({
|
||||
"type": "assignment",
|
||||
"repo": repo,
|
||||
"issue_number": issue["number"],
|
||||
"title": issue.get("title", ""),
|
||||
"url": issue.get("html_url", ""),
|
||||
"issue_key": issue_key,
|
||||
})
|
||||
state.mark_issue_seen(issue_key)
|
||||
|
||||
return events
|
||||
|
||||
|
||||
def poll_mentions(
|
||||
agent: str, token: str, state: HeartbeatState
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Scan recent comments for @mentions of this agent."""
|
||||
events = []
|
||||
mention_tag = f"@{agent}"
|
||||
|
||||
for repo in WATCHED_REPOS:
|
||||
# Get recent issues (last 24h activity)
|
||||
issues = gitea_request(
|
||||
f"/repos/{repo}/issues?state=open&limit=10&sort=updated", token
|
||||
)
|
||||
if not issues:
|
||||
continue
|
||||
|
||||
for issue in issues:
|
||||
issue_num = issue["number"]
|
||||
# Get comments
|
||||
comments = gitea_request(
|
||||
f"/repos/{repo}/issues/{issue_num}/comments?limit=10", token
|
||||
)
|
||||
if not comments:
|
||||
continue
|
||||
|
||||
for comment in comments:
|
||||
comment_id = str(comment.get("id", ""))
|
||||
if state.is_comment_seen(comment_id):
|
||||
continue
|
||||
|
||||
body = comment.get("body", "")
|
||||
if mention_tag.lower() in body.lower():
|
||||
events.append({
|
||||
"type": "mention",
|
||||
"repo": repo,
|
||||
"issue_number": issue_num,
|
||||
"comment_id": comment_id,
|
||||
"body_preview": body[:200],
|
||||
"author": comment.get("user", {}).get("login", ""),
|
||||
"url": comment.get("html_url", ""),
|
||||
})
|
||||
|
||||
state.mark_comment_seen(comment_id)
|
||||
|
||||
return events
|
||||
|
||||
|
||||
def dispatch_to_hermes(agent: str, event: Dict[str, Any]) -> bool:
|
||||
"""Dispatch an event to local hermes chat."""
|
||||
event_type = event["type"]
|
||||
repo = event["repo"]
|
||||
issue_num = event["issue_number"]
|
||||
title = event.get("title", "")
|
||||
|
||||
if event_type == "mention":
|
||||
prompt = (
|
||||
f"You were @mentioned on {repo}#{issue_num}. "
|
||||
f"Check the issue and respond. URL: {event.get('url', '')}"
|
||||
)
|
||||
elif event_type == "assignment":
|
||||
prompt = (
|
||||
f"You have been assigned {repo}#{issue_num}: {title}. "
|
||||
f"Implement the fix, commit, push, and open a PR. "
|
||||
f"URL: {event.get('url', '')}"
|
||||
)
|
||||
else:
|
||||
prompt = f"New event on {repo}#{issue_num}: {event_type}"
|
||||
|
||||
try:
|
||||
# Dispatch via hermes chat CLI
|
||||
result = subprocess.run(
|
||||
["hermes", "chat", "--quiet", prompt],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=600,
|
||||
env={**os.environ, "HERMES_SESSION_KEY": f"gitea-{agent}-{issue_num}"},
|
||||
)
|
||||
if result.returncode == 0:
|
||||
logger.info(f"Dispatched {event_type} for {repo}#{issue_num} to hermes")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"hermes chat failed: {result.stderr[:200]}")
|
||||
return False
|
||||
except FileNotFoundError:
|
||||
# hermes not in PATH — try direct path
|
||||
logger.warning("hermes CLI not found, trying ~/.hermes/bin/")
|
||||
return False
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning(f"hermes chat timed out for {repo}#{issue_num}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.warning(f"Dispatch failed: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def log_event(agent: str, event: Dict[str, Any], dispatched: bool):
|
||||
"""Log event to file."""
|
||||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
log_file = LOG_DIR / f"{agent}-{datetime.now().strftime('%Y%m%d')}.log"
|
||||
|
||||
entry = {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"agent": agent,
|
||||
"dispatched": dispatched,
|
||||
**event,
|
||||
}
|
||||
|
||||
with open(log_file, "a") as f:
|
||||
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
||||
|
||||
|
||||
def run_poll(agent: str, token: str) -> int:
|
||||
"""Run a single poll cycle. Returns number of events dispatched."""
|
||||
state = HeartbeatState(agent)
|
||||
state.last_poll = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
# Collect events
|
||||
assignment_events = poll_assigned_issues(agent, token, state)
|
||||
mention_events = poll_mentions(agent, token, state)
|
||||
|
||||
all_events = assignment_events + mention_events
|
||||
|
||||
if not all_events:
|
||||
logger.debug(f"No new events for @{agent}")
|
||||
state.save()
|
||||
return 0
|
||||
|
||||
logger.info(f"Found {len(all_events)} new event(s) for @{agent}")
|
||||
|
||||
# Dispatch
|
||||
dispatched = 0
|
||||
for event in all_events:
|
||||
success = dispatch_to_hermes(agent, event)
|
||||
log_event(agent, event, success)
|
||||
if success:
|
||||
dispatched += 1
|
||||
|
||||
state.save()
|
||||
return dispatched
|
||||
|
||||
|
||||
def run_daemon(agent: str, token: str, interval: int):
|
||||
"""Run in daemon mode — poll continuously."""
|
||||
logger.info(f"Starting daemon for @{agent} (interval: {interval}s)")
|
||||
while True:
|
||||
try:
|
||||
run_poll(agent, token)
|
||||
except Exception as e:
|
||||
logger.error(f"Poll error: {e}")
|
||||
time.sleep(interval)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="VPS Gitea Heartbeat — Poll for @mentions and assignments"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--agent", "-a",
|
||||
required=True,
|
||||
help="Agent name (ezra, bezalel, etc.)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--once",
|
||||
action="store_true",
|
||||
help="Run single poll and exit",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--daemon", "-d",
|
||||
action="store_true",
|
||||
help="Run in daemon mode",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--interval", "-i",
|
||||
type=int,
|
||||
default=DEFAULT_INTERVAL,
|
||||
help=f"Poll interval in seconds (default: {DEFAULT_INTERVAL})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v",
|
||||
action="store_true",
|
||||
help="Enable verbose logging",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.verbose else logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
|
||||
# Load token
|
||||
token = load_gitea_token()
|
||||
if not token:
|
||||
print("Error: No Gitea token found", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
agent = args.agent.lower()
|
||||
|
||||
if args.daemon:
|
||||
run_daemon(agent, token, args.interval)
|
||||
else:
|
||||
dispatched = run_poll(agent, token)
|
||||
print(f"Poll complete: {dispatched} event(s) dispatched")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
293
timmy-config/docs/big-brain-benchmark.md
Normal file
293
timmy-config/docs/big-brain-benchmark.md
Normal file
@@ -0,0 +1,293 @@
|
||||
# Big Brain Quality Benchmark
|
||||
## Big Brain (gemma3:27b, RunPod L40S) vs Local (gemma3:1b)
|
||||
|
||||
**Date:** 2026-04-14
|
||||
**Issue:** #576
|
||||
**Milestone:** Big Brain Showcase — RunPod L40S Operational
|
||||
|
||||
---
|
||||
|
||||
## Environment
|
||||
|
||||
| Parameter | Big Brain | Local |
|
||||
|-------------------|------------------------------------|---------------------|
|
||||
| Model | gemma3:27b | gemma3:1b |
|
||||
| Hardware | RunPod L40S 48GB | Apple Silicon (local Ollama) |
|
||||
| Endpoint | 8lfr3j47a5r3gn-11434.proxy.runpod.net | localhost:11434 |
|
||||
| Parameters | 27B | ~1B |
|
||||
| Status | **OFFLINE (HTTP 404)** | Operational |
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
The Big Brain RunPod L40S pod was **unreachable** during this benchmark session
|
||||
(HTTP 404 from Cloudflare proxy — pod likely terminated or not running). All four
|
||||
tasks were executed against the local gemma3:1b model only.
|
||||
|
||||
Despite being a 1B-parameter model, gemma3:1b produced responses for all tasks
|
||||
but with significant quality issues that a 27B model would be expected to avoid.
|
||||
|
||||
### Quality Assessment — Local gemma3:1b
|
||||
|
||||
| Task | Latency | Quality Score (1-5) | Verdict |
|
||||
|------|---------|---------------------|---------|
|
||||
| 1. Gitea webhook parser | 4,435 ms | 2/5 | **BROKEN** — fatal bugs in HMAC logic |
|
||||
| 2. Evennia architecture explainer | 1,823 ms | 2/5 | **SUPERFICIAL** — vague, inaccurate on key concepts |
|
||||
| 3. Fleet burn-down cron script | 11,039 ms | 2/5 | **NON-FUNCTIONAL** — uses fake commands, no RunPod API |
|
||||
| 4. Debug async Python bug | 6,610 ms | 2/5 | **WRONG DIAGNOSIS** — missed the real bugs |
|
||||
|
||||
---
|
||||
|
||||
## Task-by-Task Results
|
||||
|
||||
### Task 1: Python function to parse and validate a Gitea webhook payload
|
||||
|
||||
**Local (gemma3:1b) — 4,435 ms**
|
||||
|
||||
```python
|
||||
import json
|
||||
import hashlib
|
||||
|
||||
def validate_gitea_payload(payload, webhook_secret):
|
||||
"""
|
||||
Parses and validates a Gitea webhook payload.
|
||||
|
||||
Args:
|
||||
payload (dict): The JSON payload to validate.
|
||||
webhook_secret (str): The Gitea webhook secret.
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing:
|
||||
- bool: True if the payload is valid, False otherwise.
|
||||
- str: The parsed JSON payload if valid, None otherwise.
|
||||
"""
|
||||
try:
|
||||
data = json.loads(payload)
|
||||
if not isinstance(data, dict):
|
||||
return False, None
|
||||
|
||||
if 'ref' not in data or 'repository' not in data or 'commits' not in data:
|
||||
return False, None
|
||||
|
||||
# Verify X-Gitea-Signature header
|
||||
signature = data.get('X-Gitea-Signature', '')
|
||||
if not signature:
|
||||
return False, None
|
||||
|
||||
# Hash the signature
|
||||
calculated_hash = hashlib.sha256(signature.encode('utf-64')).hexdigest()
|
||||
|
||||
if calculated_hash != webhook_secret:
|
||||
return False, None
|
||||
|
||||
return True, data
|
||||
|
||||
except json.JSONDecodeError:
|
||||
return False, None
|
||||
except Exception as e:
|
||||
return False, None
|
||||
```
|
||||
|
||||
**Quality issues:**
|
||||
1. **FATAL:** Uses `hashlib.sha256` instead of `hmac.new()` with HMAC-SHA256 — the entire signature verification approach is wrong
|
||||
2. **FATAL:** `signature.encode('utf-64')` — no such encoding exists (would crash with LookupError)
|
||||
3. **FATAL:** Reads signature from payload body instead of HTTP header — architecturally wrong; webhook signatures are always in HTTP headers, never inside the JSON body
|
||||
4. The function accepts `payload` as a dict but calls `json.loads()` on it — type confusion
|
||||
5. Missing actual HMAC verification: should be `hmac.new(secret, payload_body, 'sha256').hexdigest()`
|
||||
|
||||
**Big Brain (gemma3:27b) — UNAVAILABLE**
|
||||
|
||||
Expected: Would correctly use `hmac.new(key, msg, hashlib.sha256)`, read signature from
|
||||
HTTP headers (not payload body), handle bytes vs string encoding properly, and include
|
||||
type hints with a proper dataclass result object.
|
||||
|
||||
---
|
||||
|
||||
### Task 2: Explain the architecture of the Evennia MUD framework in 200 words
|
||||
|
||||
**Local (gemma3:1b) — 1,823 ms**
|
||||
|
||||
> Evennia MUD utilizes a Django-based server model for core logic, providing a robust
|
||||
> and scalable foundation. The server is structured around three primary components: a
|
||||
> Session server, a Server, and a Portal. The Session server manages user sessions,
|
||||
> storing data like player states and inventory. The Server handles game logic – movement,
|
||||
> combat, and interactions – and the Portal acts as a central interface for players.
|
||||
>
|
||||
> Typeclasses are crucial, allowing for polymorphic behavior – functions can operate on
|
||||
> different player types without modification. This is achieved through a system of
|
||||
> inheritance and function signatures, enabling flexible game mechanics. The command
|
||||
> infrastructure relies on a robust command parser, allowing players to interact with the
|
||||
> game through a standardized set of commands.
|
||||
>
|
||||
> Scripting is handled through a specialized scripting language, allowing for complex game
|
||||
> rules and custom events. Players use a 'script' file format, defining actions and
|
||||
> conditions. A core scripting system utilizes a 'parse' function to interpret these files,
|
||||
> triggering events and updating the game state. All data is stored in a database, optimized
|
||||
> for efficient retrieval and manipulation.
|
||||
|
||||
**Quality issues:**
|
||||
1. **INACCURATE:** Describes "Session server" as a separate component — Evennia has Session objects managed within the Server, not a separate server process
|
||||
2. **INACCURATE:** Portal description is vague — the Portal specifically handles network connections and protocol negotiation (telnet, SSH, webclient), not just "a central interface"
|
||||
3. **WRONG:** Evennia does NOT have a "specialized scripting language" — it uses Python scripts stored in the database via EvScript/Script typeclasses
|
||||
4. **WRONG:** "Players use a 'script' file format" — this is fabricated; Evennia scripts are Python objects, not file-based
|
||||
5. **VAGUE:** Typeclass explanation is superficial — miss the key concept that they're Django proxy models providing persistent game object behavior
|
||||
6. Word count: ~185 words (close to target but imprecise)
|
||||
|
||||
**Big Brain (gemma3:27b) — UNAVAILABLE**
|
||||
|
||||
Expected: Would correctly describe Server/Portal architecture (Server=game logic+connections,
|
||||
Portal=network protocol layer), explain typeclasses as Django ContentTypes-based proxy models
|
||||
for Characters/Rooms/Exits/Scripts, detail CmdSet-based command routing, and describe
|
||||
Script DB objects (not a scripting language).
|
||||
|
||||
---
|
||||
|
||||
### Task 3: Write a cron job description for a fleet burn-down script
|
||||
|
||||
**Local (gemma3:1b) — 11,039 ms**
|
||||
|
||||
```bash
|
||||
burn_down_script() {
|
||||
# Log entry
|
||||
echo "$(date) - Fleet Burn-Down Started" | tee -a /var/log/fleet-burndown.log
|
||||
|
||||
# Iterate over all active RunPod pods
|
||||
for pod in $(podman pginfo --all --all | awk '{print $1}');
|
||||
do
|
||||
# Check GPU utilization
|
||||
gpu_util=$(podman pginfo --all --all | awk '{print $2}')
|
||||
|
||||
# Check if pod is idle for more than 30 minutes
|
||||
idle_time=300 # 30 minutes in seconds
|
||||
if [[ $(echo "$gpu_util > $idle_time" | bc) -eq 1 ]]; then
|
||||
# Terminate the pod
|
||||
podstop -d $pod
|
||||
fi
|
||||
done
|
||||
|
||||
# Send Slack notification
|
||||
echo "Fleet Burn-Down Complete." | send -t 10 "Fleet Burn-Down Complete."
|
||||
|
||||
echo "Script completed."
|
||||
}
|
||||
```
|
||||
|
||||
Cron entry: `*/5 * * * * /path/to/fleet_burndown.sh`
|
||||
|
||||
**Quality issues:**
|
||||
1. **FATAL:** Uses `podman pginfo` and `podstop` — these commands don't exist. Should use RunPod API (`curl` to `https://api.runpod.io/graphql`) or `runpodctl`
|
||||
2. **FATAL:** Idle check logic is nonsensical — compares GPU utilization percentage against 300 seconds; these are incomparable units
|
||||
3. **WRONG:** `idle_time=300` is 5 minutes, not 30 minutes (should be 1800)
|
||||
4. **WRONG:** `send -t 10` is not a standard Slack notification method — should use `curl` to Slack webhook URL
|
||||
5. **MISSING:** No actual RunPod API integration, no `RUNPOD_API_KEY` usage, no GPU utilization query
|
||||
6. No error handling, no exit codes, no structured logging
|
||||
7. The explanation text is verbose filler with self-evident comments
|
||||
|
||||
**Big Brain (gemma3:27b) — UNAVAILABLE**
|
||||
|
||||
Expected: Would produce a script using RunPod GraphQL API to list pods, query GPU metrics,
|
||||
calculate idle time from last activity timestamp, use `curl` to terminate pods via API,
|
||||
and post to Slack via webhook. Would include proper error handling, `set -euo pipefail`,
|
||||
and structured JSON logging.
|
||||
|
||||
---
|
||||
|
||||
### Task 4: Debug a subtle Python async bug
|
||||
|
||||
**Buggy code provided:**
|
||||
```python
|
||||
import asyncio
|
||||
import aiohttp
|
||||
|
||||
async def fetch_all(urls):
|
||||
results = []
|
||||
for url in urls:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
resp = await session.get(url)
|
||||
data = await resp.json()
|
||||
results.append(data)
|
||||
return results
|
||||
|
||||
async def main():
|
||||
urls = ["https://api.example.com/data1", "https://api.example.com/data2"]
|
||||
results = await fetch_all(urls)
|
||||
print(f"Got {len(results)} results")
|
||||
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
**Local (gemma3:1b) — 6,610 ms**
|
||||
|
||||
The model identified these "bugs":
|
||||
1. "Missing `return` statement in `fetch_all`" — **WRONG**, the code clearly has `return results`
|
||||
2. "No error handling for `resp.json()`" — This is a valid improvement suggestion but not a bug
|
||||
3. "No `await` keyword in `main()`" — **WRONG**, `main()` is correctly `async def` and `asyncio.run()` is the entry point
|
||||
|
||||
The model's "fix" only added a try/except wrapper and kept all the real bugs.
|
||||
|
||||
**Actual bugs in the provided code:**
|
||||
1. **Real Bug:** Creates a new `ClientSession` for every URL instead of reusing one session — wasteful, defeats connection pooling, and can cause resource leaks at scale
|
||||
2. **Real Bug:** No `resp.raise_for_status()` — silently accepts HTTP error responses (404, 500) and tries to parse them as JSON
|
||||
3. **Anti-pattern:** Sequential `await` in loop instead of `asyncio.gather()` for concurrent fetching — no parallelism despite using async
|
||||
4. **Missing:** No timeout on `session.get()` — could hang forever
|
||||
5. **Missing:** No error handling at all (the model's suggestion to add try/except was directionally right but missed the real bugs)
|
||||
|
||||
**Big Brain (gemma3:27b) — UNAVAILABLE**
|
||||
|
||||
Expected: Would correctly identify session reuse issue, lack of `raise_for_status()`,
|
||||
sequential vs concurrent fetching, and provide a proper fix using `asyncio.gather()` with
|
||||
a single shared session and timeout/deadline handling.
|
||||
|
||||
---
|
||||
|
||||
## Comparison Table
|
||||
|
||||
| Task | Local 1B (gemma3:1b) | Big Brain 27B (gemma3:27b) | Winner |
|
||||
|------|---------------------|---------------------------|--------|
|
||||
| 1. Gitea webhook parser | BROKEN — wrong HMAC, wrong encoding, wrong signature source | UNAVAILABLE (pod offline) | N/A |
|
||||
| 2. Evennia architecture | SUPERFICIAL — vague, fabricated scripting language | UNAVAILABLE (pod offline) | N/A |
|
||||
| 3. Fleet burn-down cron | NON-FUNCTIONAL — fake commands, unit mismatch | UNAVAILABLE (pod offline) | N/A |
|
||||
| 4. Debug async bug | WRONG DIAGNOSIS — missed all real bugs | UNAVAILABLE (pod offline) | N/A |
|
||||
|
||||
---
|
||||
|
||||
## Latency Summary
|
||||
|
||||
| Task | Local gemma3:1b |
|
||||
|------|-----------------|
|
||||
| 1. Gitea webhook parser | 4,435 ms |
|
||||
| 2. Evennia architecture | 1,823 ms |
|
||||
| 3. Fleet burn-down cron | 11,039 ms |
|
||||
| 4. Debug async bug | 6,610 ms |
|
||||
| **Total** | **23,907 ms** |
|
||||
|
||||
Big Brain latency: N/A (pod offline)
|
||||
|
||||
---
|
||||
|
||||
## Key Finding
|
||||
|
||||
**The 1B model fails all four tasks in ways that would be immediately obvious to a developer.**
|
||||
The failures fall into categories that large models reliably avoid:
|
||||
|
||||
- **Hallucinated APIs** (Task 3: `podman pginfo`, `podstop` don't exist)
|
||||
- **Fundamental misunderstanding of security primitives** (Task 1: SHA-256 instead of HMAC, `utf-64` encoding)
|
||||
- **Fabricated technical details** (Task 2: "specialized scripting language" in Evennia)
|
||||
- **Wrong diagnosis of provided code** (Task 4: claimed bugs that don't exist, missed real bugs)
|
||||
|
||||
This benchmark demonstrates that even without Big Brain results, the quality gap between
|
||||
1B and 27B models is expected to be substantial for technical/code generation tasks.
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Restart Big Brain pod** — RunPod pod 8lfr3j47a5r3gn is returning HTTP 404
|
||||
2. **Re-run benchmark** with both models online to populate the comparison table
|
||||
3. Consider testing with gemma3:4b (if available) as a middle-ground comparison
|
||||
4. Run Big Brain at `temperature: 0.3` for consistency with local results
|
||||
|
||||
---
|
||||
|
||||
*Generated by Ezra (Hermes Agent) — Issue #576 — 2026-04-14*
|
||||
Reference in New Issue
Block a user