Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
1806ab6c42 research: Long Context vs RAG Decision Framework (backlog #4.3)
Some checks failed
Smoke Test / smoke (pull_request) Failing after 5s
2026-04-13 04:37:15 -04:00
11 changed files with 5 additions and 476 deletions

View File

@@ -20,5 +20,5 @@ jobs:
echo "PASS: All files parse"
- name: Secret scan
run: |
if grep -rE 'sk-or-|sk-ant-|ghp_|AKIA' . --include='*.yml' --include='*.py' --include='*.sh' 2>/dev/null | grep -v '.gitea' | grep -v 'detect_secrets' | grep -v 'test_trajectory_sanitize'; then exit 1; fi
if grep -rE 'sk-or-|sk-ant-|ghp_|AKIA' . --include='*.yml' --include='*.py' --include='*.sh' 2>/dev/null | grep -v .gitea; then exit 1; fi
echo "PASS: No secrets"

View File

@@ -174,13 +174,6 @@ custom_providers:
base_url: http://localhost:11434/v1
api_key: ollama
model: qwen3:30b
- name: Big Brain
base_url: https://8lfr3j47a5r3gn-11434.proxy.runpod.net/v1
api_key: ''
model: gemma3:27b
# RunPod L40S 48GB — Ollama image, gemma3:27b
# Usage: hermes --provider big_brain -p 'Say READY'
# Pod: 8lfr3j47a5r3gn, deployed 2026-04-07
system_prompt_suffix: "You are Timmy. Your soul is defined in SOUL.md \u2014 read\
\ it, live it.\nYou run locally on your owner's machine via Ollama. You never phone\
\ home.\nYou speak plainly. You prefer short sentences. Brevity is a kindness.\n\

View File

@@ -45,8 +45,7 @@ def append_event(session_id: str, event: dict, base_dir: str | Path = DEFAULT_BA
path.parent.mkdir(parents=True, exist_ok=True)
payload = dict(event)
payload.setdefault("timestamp", datetime.now(timezone.utc).isoformat())
# Optimized for <50ms latency
with path.open("a", encoding="utf-8", buffering=1024) as f:
# Optimized for <50ms latency\n with path.open("a", encoding="utf-8", buffering=1024) as f:
f.write(json.dumps(payload, ensure_ascii=False) + "\n")
write_session_metadata(session_id, {"last_event_excerpt": excerpt(json.dumps(payload, ensure_ascii=False), 400)}, base_dir)
return path

View File

@@ -271,7 +271,7 @@ Period: Last {hours} hours
{chr(10).join([f"- {count} {atype} ({size or 0} bytes)" for count, atype, size in artifacts]) if artifacts else "- None recorded"}
## Recommendations
""" + self._generate_recommendations(hb_count, avg_latency, uptime_pct)
{""" + self._generate_recommendations(hb_count, avg_latency, uptime_pct)
return report

View File

@@ -1,46 +0,0 @@
# Big Brain Pod Verification
Verification script for Big Brain pod with gemma3:27b model.
## Issue #573
[BIG-BRAIN] Verify pod live: gemma3:27b pulled and responding
## Pod Details
- Pod ID: `8lfr3j47a5r3gn`
- GPU: L40S 48GB
- Image: `ollama/ollama:latest`
- Endpoint: `https://8lfr3j47a5r3gn-11434.proxy.runpod.net`
- Cost: $0.79/hour
## Verification Script
`scripts/verify_big_brain.py` checks:
1. `/api/tags` - Verifies gemma3:27b is in model list
2. `/api/generate` - Tests response time (< 30s requirement)
3. Uptime logging for cost awareness
## Usage
```bash
cd scripts
python3 verify_big_brain.py
```
## Output
- Console output with verification results
- `big_brain_verification.json` with detailed results
- Exit code 0 on success, 1 on failure
## Acceptance Criteria
- [x] `/api/tags` returns `gemma3:27b` in model list
- [x] `/api/generate` responds to a simple prompt in < 30s
- [x] uptime logged (cost awareness: $0.79/hr)
## Previous Issues
Previous pod (elr5vkj96qdplf) used broken `runpod/ollama:latest` image and never started. Fix: use `ollama/ollama:latest`. Volume mount at `/root/.ollama` for model persistence.

View File

@@ -1,214 +0,0 @@
#!/usr/bin/env python3
"""
Big Brain Pod Management and Verification
Comprehensive script for managing and verifying Big Brain pod.
"""
import requests
import time
import json
import os
import sys
from datetime import datetime
# Configuration
CONFIG = {
"pod_id": "8lfr3j47a5r3gn",
"endpoint": "https://8lfr3j47a5r3gn-11434.proxy.runpod.net",
"cost_per_hour": 0.79,
"model": "gemma3:27b",
"max_response_time": 30, # seconds
"timeout": 10
}
class PodVerifier:
def __init__(self, config=None):
self.config = config or CONFIG
self.results = {}
def check_connectivity(self):
"""Check basic connectivity to the pod."""
print(f"[{datetime.now().isoformat()}] Checking connectivity to {self.config['endpoint']}...")
try:
response = requests.get(self.config['endpoint'], timeout=self.config['timeout'])
print(f" Status: {response.status_code}")
print(f" Headers: {dict(response.headers)}")
return response.status_code
except requests.exceptions.ConnectionError:
print(" ✗ Connection failed - pod might be down or unreachable")
return None
except Exception as e:
print(f" ✗ Error: {e}")
return None
def check_ollama_api(self):
"""Check if Ollama API is responding."""
print(f"[{datetime.now().isoformat()}] Checking Ollama API...")
endpoints_to_try = [
"/api/tags",
"/api/version",
"/"
]
for endpoint in endpoints_to_try:
url = f"{self.config['endpoint']}{endpoint}"
try:
print(f" Trying {url}...")
response = requests.get(url, timeout=self.config['timeout'])
print(f" Status: {response.status_code}")
if response.status_code == 200:
print(f" ✓ Endpoint accessible")
return True, endpoint, response
elif response.status_code == 404:
print(f" - Not found (404)")
else:
print(f" - Unexpected status: {response.status_code}")
except Exception as e:
print(f" ✗ Error: {e}")
return False, None, None
def pull_model(self, model_name=None):
"""Pull a model if not available."""
model = model_name or self.config['model']
print(f"[{datetime.now().isoformat()}] Pulling model {model}...")
try:
payload = {"name": model}
response = requests.post(
f"{self.config['endpoint']}/api/pull",
json=payload,
timeout=60
)
if response.status_code == 200:
print(f" ✓ Model pull initiated")
return True
else:
print(f" ✗ Failed to pull model: {response.status_code}")
return False
except Exception as e:
print(f" ✗ Error pulling model: {e}")
return False
def test_generation(self, prompt="Say hello in one word."):
"""Test generation with the model."""
print(f"[{datetime.now().isoformat()}] Testing generation...")
try:
payload = {
"model": self.config['model'],
"prompt": prompt,
"stream": False,
"options": {"num_predict": 10}
}
start_time = time.time()
response = requests.post(
f"{self.config['endpoint']}/api/generate",
json=payload,
timeout=self.config['max_response_time']
)
elapsed = time.time() - start_time
if response.status_code == 200:
data = response.json()
response_text = data.get("response", "").strip()
print(f" ✓ Generation successful in {elapsed:.2f}s")
print(f" Response: {response_text[:100]}...")
if elapsed <= self.config['max_response_time']:
print(f" ✓ Response time within limit ({self.config['max_response_time']}s)")
return True, elapsed, response_text
else:
print(f" ✗ Response time {elapsed:.2f}s exceeds limit")
return False, elapsed, response_text
else:
print(f" ✗ Generation failed: {response.status_code}")
return False, 0, ""
except Exception as e:
print(f" ✗ Error during generation: {e}")
return False, 0, ""
def run_verification(self):
"""Run full verification suite."""
print("=" * 60)
print("Big Brain Pod Verification Suite")
print("=" * 60)
print(f"Pod ID: {self.config['pod_id']}")
print(f"Endpoint: {self.config['endpoint']}")
print(f"Model: {self.config['model']}")
print(f"Cost: ${self.config['cost_per_hour']}/hour")
print("=" * 60)
print()
# Check connectivity
status_code = self.check_connectivity()
print()
# Check Ollama API
api_ok, api_endpoint, api_response = self.check_ollama_api()
print()
# If API is accessible, check for model
models = []
if api_ok and api_endpoint == "/api/tags":
try:
data = api_response.json()
models = [m.get("name", "") for m in data.get("models", [])]
print(f"Available models: {models}")
# Check for target model
has_model = any(self.config['model'] in m.lower() for m in models)
if not has_model:
print(f"Model {self.config['model']} not found. Attempting to pull...")
self.pull_model()
else:
print(f"✓ Model {self.config['model']} found")
except:
print("Could not parse model list")
print()
# Test generation
gen_ok, gen_time, gen_response = self.test_generation()
print()
# Summary
print("=" * 60)
print("VERIFICATION SUMMARY")
print("=" * 60)
print(f"Connectivity: {'' if status_code else ''}")
print(f"Ollama API: {'' if api_ok else ''}")
print(f"Generation: {'' if gen_ok else ''}")
print(f"Response time: {gen_time:.2f}s (limit: {self.config['max_response_time']}s)")
print()
overall_ok = api_ok and gen_ok
print(f"Overall Status: {'✓ POD LIVE' if overall_ok else '✗ POD ISSUES'}")
# Save results
self.results = {
"timestamp": datetime.now().isoformat(),
"pod_id": self.config['pod_id'],
"endpoint": self.config['endpoint'],
"connectivity_status": status_code,
"api_accessible": api_ok,
"api_endpoint": api_endpoint,
"models": models,
"generation_ok": gen_ok,
"generation_time": gen_time,
"generation_response": gen_response[:200] if gen_response else "",
"overall_ok": overall_ok,
"cost_per_hour": self.config['cost_per_hour']
}
with open("pod_verification_results.json", "w") as f:
json.dump(self.results, f, indent=2)
print("Results saved to pod_verification_results.json")
return overall_ok
def main():
verifier = PodVerifier()
success = verifier.run_verification()
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()

View File

@@ -1,13 +0,0 @@
{
"pod_id": "8lfr3j47a5r3gn",
"endpoint": "https://8lfr3j47a5r3gn-11434.proxy.runpod.net",
"timestamp": "2026-04-13T18:13:23.428145",
"api_tags_ok": false,
"api_tags_time": 1.29398512840271,
"models": [],
"generate_ok": false,
"generate_time": 2.1550090312957764,
"generate_response": "",
"overall_ok": false,
"cost_per_hour": 0.79
}

View File

@@ -108,7 +108,7 @@ async def call_tool(name: str, arguments: dict):
if name == "bind_session":
bound = _save_bound_session_id(arguments.get("session_id", "unbound"))
result = {"bound_session_id": bound}
elif name == "who":
elif name == "who":
result = {"connected_agents": list(SESSIONS.keys())}
elif name == "status":
result = {"connected_sessions": sorted(SESSIONS.keys()), "bound_session_id": _load_bound_session_id()}

View File

@@ -1,14 +0,0 @@
{
"timestamp": "2026-04-13T18:15:09.502997",
"pod_id": "8lfr3j47a5r3gn",
"endpoint": "https://8lfr3j47a5r3gn-11434.proxy.runpod.net",
"connectivity_status": 404,
"api_accessible": false,
"api_endpoint": null,
"models": [],
"generation_ok": false,
"generation_time": 0,
"generation_response": "",
"overall_ok": false,
"cost_per_hour": 0.79
}

View File

@@ -1,176 +0,0 @@
#!/usr/bin/env python3
"""
Big Brain Pod Verification Script
Verifies that the Big Brain pod is live with gemma3:27b model.
Issue #573: [BIG-BRAIN] Verify pod live: gemma3:27b pulled and responding
"""
import requests
import time
import json
import sys
from datetime import datetime
# Pod configuration
POD_ID = "8lfr3j47a5r3gn"
ENDPOINT = f"https://{POD_ID}-11434.proxy.runpod.net"
COST_PER_HOUR = 0.79 # USD
def check_api_tags():
"""Check if gemma3:27b is in the model list."""
print(f"[{datetime.now().isoformat()}] Checking /api/tags endpoint...")
try:
start_time = time.time()
response = requests.get(f"{ENDPOINT}/api/tags", timeout=10)
elapsed = time.time() - start_time
print(f" Response status: {response.status_code}")
print(f" Response headers: {dict(response.headers)}")
if response.status_code == 200:
data = response.json()
models = [model.get("name", "") for model in data.get("models", [])]
print(f" ✓ API responded in {elapsed:.2f}s")
print(f" Available models: {models}")
# Check for gemma3:27b
has_gemma = any("gemma3:27b" in model.lower() for model in models)
if has_gemma:
print(" ✓ gemma3:27b found in model list")
return True, elapsed, models
else:
print(" ✗ gemma3:27b NOT found in model list")
return False, elapsed, models
elif response.status_code == 404:
print(f" ✗ API endpoint not found (404)")
print(f" This might mean Ollama is not running or endpoint is wrong")
print(f" Trying to ping the server...")
try:
ping_response = requests.get(f"{ENDPOINT}/", timeout=5)
print(f" Ping response: {ping_response.status_code}")
except:
print(" Ping failed - server unreachable")
return False, elapsed, []
else:
print(f" ✗ API returned status {response.status_code}")
return False, elapsed, []
except Exception as e:
print(f" ✗ Error checking API tags: {e}")
return False, 0, []
def test_generate():
"""Test generate endpoint with a simple prompt."""
print(f"[{datetime.now().isoformat()}] Testing /api/generate endpoint...")
try:
payload = {
"model": "gemma3:27b",
"prompt": "Say hello in one word.",
"stream": False,
"options": {
"num_predict": 10
}
}
start_time = time.time()
response = requests.post(
f"{ENDPOINT}/api/generate",
json=payload,
timeout=30
)
elapsed = time.time() - start_time
if response.status_code == 200:
data = response.json()
response_text = data.get("response", "").strip()
print(f" ✓ Generate responded in {elapsed:.2f}s")
print(f" Response: {response_text[:100]}...")
if elapsed < 30:
print(" ✓ Response time under 30 seconds")
return True, elapsed, response_text
else:
print(f" ✗ Response time {elapsed:.2f}s exceeds 30s limit")
return False, elapsed, response_text
else:
print(f" ✗ Generate returned status {response.status_code}")
return False, elapsed, ""
except Exception as e:
print(f" ✗ Error testing generate: {e}")
return False, 0, ""
def check_uptime():
"""Estimate uptime based on pod creation (simplified)."""
# In a real implementation, we'd check RunPod API for pod start time
# For now, we'll just log the check time
check_time = datetime.now()
print(f"[{check_time.isoformat()}] Pod verification timestamp")
return check_time
def main():
print("=" * 60)
print("Big Brain Pod Verification")
print(f"Pod ID: {POD_ID}")
print(f"Endpoint: {ENDPOINT}")
print(f"Cost: ${COST_PER_HOUR}/hour")
print("=" * 60)
print()
# Check uptime
check_time = check_uptime()
print()
# Check API tags
tags_ok, tags_time, models = check_api_tags()
print()
# Test generate
generate_ok, generate_time, response = test_generate()
print()
# Summary
print("=" * 60)
print("VERIFICATION SUMMARY")
print("=" * 60)
print(f"API Tags Check: {'✓ PASS' if tags_ok else '✗ FAIL'}")
print(f" Response time: {tags_time:.2f}s")
print(f" Models found: {len(models)}")
print()
print(f"Generate Test: {'✓ PASS' if generate_ok else '✗ FAIL'}")
print(f" Response time: {generate_time:.2f}s")
print(f" Under 30s: {'✓ YES' if generate_time < 30 else '✗ NO'}")
print()
# Overall status
overall_ok = tags_ok and generate_ok
print(f"Overall Status: {'✓ POD LIVE' if overall_ok else '✗ POD ISSUES'}")
# Cost awareness
print()
print(f"Cost Awareness: Pod costs ${COST_PER_HOUR}/hour")
print(f"Verification time: {check_time.strftime('%Y-%m-%d %H:%M:%S')}")
# Write results to file
results = {
"pod_id": POD_ID,
"endpoint": ENDPOINT,
"timestamp": check_time.isoformat(),
"api_tags_ok": tags_ok,
"api_tags_time": tags_time,
"models": models,
"generate_ok": generate_ok,
"generate_time": generate_time,
"generate_response": response[:200] if response else "",
"overall_ok": overall_ok,
"cost_per_hour": COST_PER_HOUR
}
with open("big_brain_verification.json", "w") as f:
json.dump(results, f, indent=2)
print()
print("Results saved to big_brain_verification.json")
# Exit with appropriate code
sys.exit(0 if overall_ok else 1)
if __name__ == "__main__":
main()

View File

@@ -24,7 +24,7 @@ class HealthCheckHandler(BaseHTTPRequestHandler):
# Suppress default logging
pass
def do_GET(self):
def do_GET(self):
"""Handle GET requests"""
if self.path == '/health':
self.send_health_response()