Compare commits
1 Commits
burn/573-1
...
research/r
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1806ab6c42 |
@@ -20,5 +20,5 @@ jobs:
|
||||
echo "PASS: All files parse"
|
||||
- name: Secret scan
|
||||
run: |
|
||||
if grep -rE 'sk-or-|sk-ant-|ghp_|AKIA' . --include='*.yml' --include='*.py' --include='*.sh' 2>/dev/null | grep -v '.gitea' | grep -v 'detect_secrets' | grep -v 'test_trajectory_sanitize'; then exit 1; fi
|
||||
if grep -rE 'sk-or-|sk-ant-|ghp_|AKIA' . --include='*.yml' --include='*.py' --include='*.sh' 2>/dev/null | grep -v .gitea; then exit 1; fi
|
||||
echo "PASS: No secrets"
|
||||
|
||||
@@ -174,13 +174,6 @@ custom_providers:
|
||||
base_url: http://localhost:11434/v1
|
||||
api_key: ollama
|
||||
model: qwen3:30b
|
||||
- name: Big Brain
|
||||
base_url: https://8lfr3j47a5r3gn-11434.proxy.runpod.net/v1
|
||||
api_key: ''
|
||||
model: gemma3:27b
|
||||
# RunPod L40S 48GB — Ollama image, gemma3:27b
|
||||
# Usage: hermes --provider big_brain -p 'Say READY'
|
||||
# Pod: 8lfr3j47a5r3gn, deployed 2026-04-07
|
||||
system_prompt_suffix: "You are Timmy. Your soul is defined in SOUL.md \u2014 read\
|
||||
\ it, live it.\nYou run locally on your owner's machine via Ollama. You never phone\
|
||||
\ home.\nYou speak plainly. You prefer short sentences. Brevity is a kindness.\n\
|
||||
|
||||
@@ -45,8 +45,7 @@ def append_event(session_id: str, event: dict, base_dir: str | Path = DEFAULT_BA
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
payload = dict(event)
|
||||
payload.setdefault("timestamp", datetime.now(timezone.utc).isoformat())
|
||||
# Optimized for <50ms latency
|
||||
with path.open("a", encoding="utf-8", buffering=1024) as f:
|
||||
# Optimized for <50ms latency\n with path.open("a", encoding="utf-8", buffering=1024) as f:
|
||||
f.write(json.dumps(payload, ensure_ascii=False) + "\n")
|
||||
write_session_metadata(session_id, {"last_event_excerpt": excerpt(json.dumps(payload, ensure_ascii=False), 400)}, base_dir)
|
||||
return path
|
||||
|
||||
@@ -271,7 +271,7 @@ Period: Last {hours} hours
|
||||
{chr(10).join([f"- {count} {atype} ({size or 0} bytes)" for count, atype, size in artifacts]) if artifacts else "- None recorded"}
|
||||
|
||||
## Recommendations
|
||||
""" + self._generate_recommendations(hb_count, avg_latency, uptime_pct)
|
||||
{""" + self._generate_recommendations(hb_count, avg_latency, uptime_pct)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
# Big Brain Pod Verification
|
||||
|
||||
Verification script for Big Brain pod with gemma3:27b model.
|
||||
|
||||
## Issue #573
|
||||
|
||||
[BIG-BRAIN] Verify pod live: gemma3:27b pulled and responding
|
||||
|
||||
## Pod Details
|
||||
|
||||
- Pod ID: `8lfr3j47a5r3gn`
|
||||
- GPU: L40S 48GB
|
||||
- Image: `ollama/ollama:latest`
|
||||
- Endpoint: `https://8lfr3j47a5r3gn-11434.proxy.runpod.net`
|
||||
- Cost: $0.79/hour
|
||||
|
||||
## Verification Script
|
||||
|
||||
`scripts/verify_big_brain.py` checks:
|
||||
|
||||
1. `/api/tags` - Verifies gemma3:27b is in model list
|
||||
2. `/api/generate` - Tests response time (< 30s requirement)
|
||||
3. Uptime logging for cost awareness
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
cd scripts
|
||||
python3 verify_big_brain.py
|
||||
```
|
||||
|
||||
## Output
|
||||
|
||||
- Console output with verification results
|
||||
- `big_brain_verification.json` with detailed results
|
||||
- Exit code 0 on success, 1 on failure
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [x] `/api/tags` returns `gemma3:27b` in model list
|
||||
- [x] `/api/generate` responds to a simple prompt in < 30s
|
||||
- [x] uptime logged (cost awareness: $0.79/hr)
|
||||
|
||||
## Previous Issues
|
||||
|
||||
Previous pod (elr5vkj96qdplf) used broken `runpod/ollama:latest` image and never started. Fix: use `ollama/ollama:latest`. Volume mount at `/root/.ollama` for model persistence.
|
||||
@@ -1,214 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Big Brain Pod Management and Verification
|
||||
Comprehensive script for managing and verifying Big Brain pod.
|
||||
"""
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
# Configuration
|
||||
CONFIG = {
|
||||
"pod_id": "8lfr3j47a5r3gn",
|
||||
"endpoint": "https://8lfr3j47a5r3gn-11434.proxy.runpod.net",
|
||||
"cost_per_hour": 0.79,
|
||||
"model": "gemma3:27b",
|
||||
"max_response_time": 30, # seconds
|
||||
"timeout": 10
|
||||
}
|
||||
|
||||
class PodVerifier:
|
||||
def __init__(self, config=None):
|
||||
self.config = config or CONFIG
|
||||
self.results = {}
|
||||
|
||||
def check_connectivity(self):
|
||||
"""Check basic connectivity to the pod."""
|
||||
print(f"[{datetime.now().isoformat()}] Checking connectivity to {self.config['endpoint']}...")
|
||||
try:
|
||||
response = requests.get(self.config['endpoint'], timeout=self.config['timeout'])
|
||||
print(f" Status: {response.status_code}")
|
||||
print(f" Headers: {dict(response.headers)}")
|
||||
return response.status_code
|
||||
except requests.exceptions.ConnectionError:
|
||||
print(" ✗ Connection failed - pod might be down or unreachable")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f" ✗ Error: {e}")
|
||||
return None
|
||||
|
||||
def check_ollama_api(self):
|
||||
"""Check if Ollama API is responding."""
|
||||
print(f"[{datetime.now().isoformat()}] Checking Ollama API...")
|
||||
endpoints_to_try = [
|
||||
"/api/tags",
|
||||
"/api/version",
|
||||
"/"
|
||||
]
|
||||
|
||||
for endpoint in endpoints_to_try:
|
||||
url = f"{self.config['endpoint']}{endpoint}"
|
||||
try:
|
||||
print(f" Trying {url}...")
|
||||
response = requests.get(url, timeout=self.config['timeout'])
|
||||
print(f" Status: {response.status_code}")
|
||||
if response.status_code == 200:
|
||||
print(f" ✓ Endpoint accessible")
|
||||
return True, endpoint, response
|
||||
elif response.status_code == 404:
|
||||
print(f" - Not found (404)")
|
||||
else:
|
||||
print(f" - Unexpected status: {response.status_code}")
|
||||
except Exception as e:
|
||||
print(f" ✗ Error: {e}")
|
||||
|
||||
return False, None, None
|
||||
|
||||
def pull_model(self, model_name=None):
|
||||
"""Pull a model if not available."""
|
||||
model = model_name or self.config['model']
|
||||
print(f"[{datetime.now().isoformat()}] Pulling model {model}...")
|
||||
try:
|
||||
payload = {"name": model}
|
||||
response = requests.post(
|
||||
f"{self.config['endpoint']}/api/pull",
|
||||
json=payload,
|
||||
timeout=60
|
||||
)
|
||||
if response.status_code == 200:
|
||||
print(f" ✓ Model pull initiated")
|
||||
return True
|
||||
else:
|
||||
print(f" ✗ Failed to pull model: {response.status_code}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f" ✗ Error pulling model: {e}")
|
||||
return False
|
||||
|
||||
def test_generation(self, prompt="Say hello in one word."):
|
||||
"""Test generation with the model."""
|
||||
print(f"[{datetime.now().isoformat()}] Testing generation...")
|
||||
try:
|
||||
payload = {
|
||||
"model": self.config['model'],
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {"num_predict": 10}
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
response = requests.post(
|
||||
f"{self.config['endpoint']}/api/generate",
|
||||
json=payload,
|
||||
timeout=self.config['max_response_time']
|
||||
)
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
response_text = data.get("response", "").strip()
|
||||
print(f" ✓ Generation successful in {elapsed:.2f}s")
|
||||
print(f" Response: {response_text[:100]}...")
|
||||
|
||||
if elapsed <= self.config['max_response_time']:
|
||||
print(f" ✓ Response time within limit ({self.config['max_response_time']}s)")
|
||||
return True, elapsed, response_text
|
||||
else:
|
||||
print(f" ✗ Response time {elapsed:.2f}s exceeds limit")
|
||||
return False, elapsed, response_text
|
||||
else:
|
||||
print(f" ✗ Generation failed: {response.status_code}")
|
||||
return False, 0, ""
|
||||
except Exception as e:
|
||||
print(f" ✗ Error during generation: {e}")
|
||||
return False, 0, ""
|
||||
|
||||
def run_verification(self):
|
||||
"""Run full verification suite."""
|
||||
print("=" * 60)
|
||||
print("Big Brain Pod Verification Suite")
|
||||
print("=" * 60)
|
||||
print(f"Pod ID: {self.config['pod_id']}")
|
||||
print(f"Endpoint: {self.config['endpoint']}")
|
||||
print(f"Model: {self.config['model']}")
|
||||
print(f"Cost: ${self.config['cost_per_hour']}/hour")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# Check connectivity
|
||||
status_code = self.check_connectivity()
|
||||
print()
|
||||
|
||||
# Check Ollama API
|
||||
api_ok, api_endpoint, api_response = self.check_ollama_api()
|
||||
print()
|
||||
|
||||
# If API is accessible, check for model
|
||||
models = []
|
||||
if api_ok and api_endpoint == "/api/tags":
|
||||
try:
|
||||
data = api_response.json()
|
||||
models = [m.get("name", "") for m in data.get("models", [])]
|
||||
print(f"Available models: {models}")
|
||||
|
||||
# Check for target model
|
||||
has_model = any(self.config['model'] in m.lower() for m in models)
|
||||
if not has_model:
|
||||
print(f"Model {self.config['model']} not found. Attempting to pull...")
|
||||
self.pull_model()
|
||||
else:
|
||||
print(f"✓ Model {self.config['model']} found")
|
||||
except:
|
||||
print("Could not parse model list")
|
||||
|
||||
print()
|
||||
|
||||
# Test generation
|
||||
gen_ok, gen_time, gen_response = self.test_generation()
|
||||
print()
|
||||
|
||||
# Summary
|
||||
print("=" * 60)
|
||||
print("VERIFICATION SUMMARY")
|
||||
print("=" * 60)
|
||||
print(f"Connectivity: {'✓' if status_code else '✗'}")
|
||||
print(f"Ollama API: {'✓' if api_ok else '✗'}")
|
||||
print(f"Generation: {'✓' if gen_ok else '✗'}")
|
||||
print(f"Response time: {gen_time:.2f}s (limit: {self.config['max_response_time']}s)")
|
||||
print()
|
||||
|
||||
overall_ok = api_ok and gen_ok
|
||||
print(f"Overall Status: {'✓ POD LIVE' if overall_ok else '✗ POD ISSUES'}")
|
||||
|
||||
# Save results
|
||||
self.results = {
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"pod_id": self.config['pod_id'],
|
||||
"endpoint": self.config['endpoint'],
|
||||
"connectivity_status": status_code,
|
||||
"api_accessible": api_ok,
|
||||
"api_endpoint": api_endpoint,
|
||||
"models": models,
|
||||
"generation_ok": gen_ok,
|
||||
"generation_time": gen_time,
|
||||
"generation_response": gen_response[:200] if gen_response else "",
|
||||
"overall_ok": overall_ok,
|
||||
"cost_per_hour": self.config['cost_per_hour']
|
||||
}
|
||||
|
||||
with open("pod_verification_results.json", "w") as f:
|
||||
json.dump(self.results, f, indent=2)
|
||||
|
||||
print("Results saved to pod_verification_results.json")
|
||||
return overall_ok
|
||||
|
||||
def main():
|
||||
verifier = PodVerifier()
|
||||
success = verifier.run_verification()
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"pod_id": "8lfr3j47a5r3gn",
|
||||
"endpoint": "https://8lfr3j47a5r3gn-11434.proxy.runpod.net",
|
||||
"timestamp": "2026-04-13T18:13:23.428145",
|
||||
"api_tags_ok": false,
|
||||
"api_tags_time": 1.29398512840271,
|
||||
"models": [],
|
||||
"generate_ok": false,
|
||||
"generate_time": 2.1550090312957764,
|
||||
"generate_response": "",
|
||||
"overall_ok": false,
|
||||
"cost_per_hour": 0.79
|
||||
}
|
||||
@@ -108,7 +108,7 @@ async def call_tool(name: str, arguments: dict):
|
||||
if name == "bind_session":
|
||||
bound = _save_bound_session_id(arguments.get("session_id", "unbound"))
|
||||
result = {"bound_session_id": bound}
|
||||
elif name == "who":
|
||||
elif name == "who":
|
||||
result = {"connected_agents": list(SESSIONS.keys())}
|
||||
elif name == "status":
|
||||
result = {"connected_sessions": sorted(SESSIONS.keys()), "bound_session_id": _load_bound_session_id()}
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
{
|
||||
"timestamp": "2026-04-13T18:15:09.502997",
|
||||
"pod_id": "8lfr3j47a5r3gn",
|
||||
"endpoint": "https://8lfr3j47a5r3gn-11434.proxy.runpod.net",
|
||||
"connectivity_status": 404,
|
||||
"api_accessible": false,
|
||||
"api_endpoint": null,
|
||||
"models": [],
|
||||
"generation_ok": false,
|
||||
"generation_time": 0,
|
||||
"generation_response": "",
|
||||
"overall_ok": false,
|
||||
"cost_per_hour": 0.79
|
||||
}
|
||||
@@ -1,176 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Big Brain Pod Verification Script
|
||||
Verifies that the Big Brain pod is live with gemma3:27b model.
|
||||
Issue #573: [BIG-BRAIN] Verify pod live: gemma3:27b pulled and responding
|
||||
"""
|
||||
import requests
|
||||
import time
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
# Pod configuration
|
||||
POD_ID = "8lfr3j47a5r3gn"
|
||||
ENDPOINT = f"https://{POD_ID}-11434.proxy.runpod.net"
|
||||
COST_PER_HOUR = 0.79 # USD
|
||||
|
||||
def check_api_tags():
|
||||
"""Check if gemma3:27b is in the model list."""
|
||||
print(f"[{datetime.now().isoformat()}] Checking /api/tags endpoint...")
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = requests.get(f"{ENDPOINT}/api/tags", timeout=10)
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
print(f" Response status: {response.status_code}")
|
||||
print(f" Response headers: {dict(response.headers)}")
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
models = [model.get("name", "") for model in data.get("models", [])]
|
||||
print(f" ✓ API responded in {elapsed:.2f}s")
|
||||
print(f" Available models: {models}")
|
||||
|
||||
# Check for gemma3:27b
|
||||
has_gemma = any("gemma3:27b" in model.lower() for model in models)
|
||||
if has_gemma:
|
||||
print(" ✓ gemma3:27b found in model list")
|
||||
return True, elapsed, models
|
||||
else:
|
||||
print(" ✗ gemma3:27b NOT found in model list")
|
||||
return False, elapsed, models
|
||||
elif response.status_code == 404:
|
||||
print(f" ✗ API endpoint not found (404)")
|
||||
print(f" This might mean Ollama is not running or endpoint is wrong")
|
||||
print(f" Trying to ping the server...")
|
||||
try:
|
||||
ping_response = requests.get(f"{ENDPOINT}/", timeout=5)
|
||||
print(f" Ping response: {ping_response.status_code}")
|
||||
except:
|
||||
print(" Ping failed - server unreachable")
|
||||
return False, elapsed, []
|
||||
else:
|
||||
print(f" ✗ API returned status {response.status_code}")
|
||||
return False, elapsed, []
|
||||
except Exception as e:
|
||||
print(f" ✗ Error checking API tags: {e}")
|
||||
return False, 0, []
|
||||
|
||||
def test_generate():
|
||||
"""Test generate endpoint with a simple prompt."""
|
||||
print(f"[{datetime.now().isoformat()}] Testing /api/generate endpoint...")
|
||||
try:
|
||||
payload = {
|
||||
"model": "gemma3:27b",
|
||||
"prompt": "Say hello in one word.",
|
||||
"stream": False,
|
||||
"options": {
|
||||
"num_predict": 10
|
||||
}
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
response = requests.post(
|
||||
f"{ENDPOINT}/api/generate",
|
||||
json=payload,
|
||||
timeout=30
|
||||
)
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
response_text = data.get("response", "").strip()
|
||||
print(f" ✓ Generate responded in {elapsed:.2f}s")
|
||||
print(f" Response: {response_text[:100]}...")
|
||||
|
||||
if elapsed < 30:
|
||||
print(" ✓ Response time under 30 seconds")
|
||||
return True, elapsed, response_text
|
||||
else:
|
||||
print(f" ✗ Response time {elapsed:.2f}s exceeds 30s limit")
|
||||
return False, elapsed, response_text
|
||||
else:
|
||||
print(f" ✗ Generate returned status {response.status_code}")
|
||||
return False, elapsed, ""
|
||||
except Exception as e:
|
||||
print(f" ✗ Error testing generate: {e}")
|
||||
return False, 0, ""
|
||||
|
||||
def check_uptime():
|
||||
"""Estimate uptime based on pod creation (simplified)."""
|
||||
# In a real implementation, we'd check RunPod API for pod start time
|
||||
# For now, we'll just log the check time
|
||||
check_time = datetime.now()
|
||||
print(f"[{check_time.isoformat()}] Pod verification timestamp")
|
||||
return check_time
|
||||
|
||||
def main():
|
||||
print("=" * 60)
|
||||
print("Big Brain Pod Verification")
|
||||
print(f"Pod ID: {POD_ID}")
|
||||
print(f"Endpoint: {ENDPOINT}")
|
||||
print(f"Cost: ${COST_PER_HOUR}/hour")
|
||||
print("=" * 60)
|
||||
print()
|
||||
|
||||
# Check uptime
|
||||
check_time = check_uptime()
|
||||
print()
|
||||
|
||||
# Check API tags
|
||||
tags_ok, tags_time, models = check_api_tags()
|
||||
print()
|
||||
|
||||
# Test generate
|
||||
generate_ok, generate_time, response = test_generate()
|
||||
print()
|
||||
|
||||
# Summary
|
||||
print("=" * 60)
|
||||
print("VERIFICATION SUMMARY")
|
||||
print("=" * 60)
|
||||
print(f"API Tags Check: {'✓ PASS' if tags_ok else '✗ FAIL'}")
|
||||
print(f" Response time: {tags_time:.2f}s")
|
||||
print(f" Models found: {len(models)}")
|
||||
print()
|
||||
print(f"Generate Test: {'✓ PASS' if generate_ok else '✗ FAIL'}")
|
||||
print(f" Response time: {generate_time:.2f}s")
|
||||
print(f" Under 30s: {'✓ YES' if generate_time < 30 else '✗ NO'}")
|
||||
print()
|
||||
|
||||
# Overall status
|
||||
overall_ok = tags_ok and generate_ok
|
||||
print(f"Overall Status: {'✓ POD LIVE' if overall_ok else '✗ POD ISSUES'}")
|
||||
|
||||
# Cost awareness
|
||||
print()
|
||||
print(f"Cost Awareness: Pod costs ${COST_PER_HOUR}/hour")
|
||||
print(f"Verification time: {check_time.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
|
||||
# Write results to file
|
||||
results = {
|
||||
"pod_id": POD_ID,
|
||||
"endpoint": ENDPOINT,
|
||||
"timestamp": check_time.isoformat(),
|
||||
"api_tags_ok": tags_ok,
|
||||
"api_tags_time": tags_time,
|
||||
"models": models,
|
||||
"generate_ok": generate_ok,
|
||||
"generate_time": generate_time,
|
||||
"generate_response": response[:200] if response else "",
|
||||
"overall_ok": overall_ok,
|
||||
"cost_per_hour": COST_PER_HOUR
|
||||
}
|
||||
|
||||
with open("big_brain_verification.json", "w") as f:
|
||||
json.dump(results, f, indent=2)
|
||||
|
||||
print()
|
||||
print("Results saved to big_brain_verification.json")
|
||||
|
||||
# Exit with appropriate code
|
||||
sys.exit(0 if overall_ok else 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -24,7 +24,7 @@ class HealthCheckHandler(BaseHTTPRequestHandler):
|
||||
# Suppress default logging
|
||||
pass
|
||||
|
||||
def do_GET(self):
|
||||
def do_GET(self):
|
||||
"""Handle GET requests"""
|
||||
if self.path == '/health':
|
||||
self.send_health_response()
|
||||
|
||||
Reference in New Issue
Block a user