201 lines
6.0 KiB
Python
201 lines
6.0 KiB
Python
"""
|
|
Health Check Daemon for Uni-Wizard
|
|
Monitors VPS status and exposes health endpoint
|
|
"""
|
|
|
|
import json
|
|
import time
|
|
import threading
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
import sys
|
|
|
|
# Add parent to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from harness import get_harness
|
|
|
|
|
|
class HealthCheckHandler(BaseHTTPRequestHandler):
|
|
"""HTTP handler for health endpoint"""
|
|
|
|
def log_message(self, format, *args):
|
|
# Suppress default logging
|
|
pass
|
|
|
|
def do_GET(self):
|
|
"""Handle GET requests"""
|
|
if self.path == '/health':
|
|
self.send_health_response()
|
|
elif self.path == '/status':
|
|
self.send_full_status()
|
|
elif self.path == '/metrics':
|
|
self.send_sovereign_metrics()
|
|
else:
|
|
self.send_error(404)
|
|
|
|
def send_sovereign_metrics(self):
|
|
"""Send sovereign health metrics as JSON"""
|
|
try:
|
|
import sqlite3
|
|
db_path = Path.home() / ".timmy" / "metrics" / "model_metrics.db"
|
|
if not db_path.exists():
|
|
data = {"error": "No database found"}
|
|
else:
|
|
conn = sqlite3.connect(str(db_path))
|
|
row = conn.execute("""
|
|
SELECT local_pct, total_sessions, local_sessions, cloud_sessions, est_cloud_cost, est_saved
|
|
FROM sovereignty_score ORDER BY timestamp DESC LIMIT 1
|
|
""").fetchone()
|
|
|
|
if row:
|
|
data = {
|
|
"sovereignty_score": row[0],
|
|
"total_sessions": row[1],
|
|
"local_sessions": row[2],
|
|
"cloud_sessions": row[3],
|
|
"est_cloud_cost": row[4],
|
|
"est_saved": row[5],
|
|
"timestamp": datetime.now().isoformat()
|
|
}
|
|
else:
|
|
data = {"error": "No data"}
|
|
conn.close()
|
|
except Exception as e:
|
|
data = {"error": str(e)}
|
|
|
|
self.send_response(200)
|
|
self.send_header('Content-Type', 'application/json')
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps(data).encode())
|
|
|
|
def send_full_status(self):
|
|
"""Send full system status"""
|
|
harness = get_harness()
|
|
|
|
status = {
|
|
"timestamp": datetime.now().isoformat(),
|
|
"harness": json.loads(harness.get_status()),
|
|
"system": json.loads(harness.execute("system_info")),
|
|
"health": json.loads(harness.execute("health_check"))
|
|
}
|
|
|
|
self.send_response(200)
|
|
self.send_header('Content-Type', 'application/json')
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps(status, indent=2).encode())
|
|
|
|
|
|
class HealthDaemon:
|
|
"""
|
|
Health monitoring daemon.
|
|
|
|
Runs continuously, monitoring:
|
|
- System resources
|
|
- Service status
|
|
- Inference endpoint
|
|
|
|
Exposes:
|
|
- HTTP endpoint on port 8082
|
|
- JSON status file at ~/timmy/logs/health.json
|
|
"""
|
|
|
|
def __init__(self, port: int = 8082, check_interval: int = 60):
|
|
self.port = port
|
|
self.check_interval = check_interval
|
|
self.running = False
|
|
self.server = None
|
|
self.monitor_thread = None
|
|
self.last_health = None
|
|
|
|
# Ensure log directory exists
|
|
self.log_path = Path.home() / "timmy" / "logs"
|
|
self.log_path.mkdir(parents=True, exist_ok=True)
|
|
self.health_file = self.log_path / "health.json"
|
|
|
|
def start(self):
|
|
"""Start the health daemon"""
|
|
self.running = True
|
|
|
|
# Start HTTP server
|
|
self.server = HTTPServer(('127.0.0.1', self.port), HealthCheckHandler)
|
|
server_thread = threading.Thread(target=self.server.serve_forever)
|
|
server_thread.daemon = True
|
|
server_thread.start()
|
|
|
|
# Start monitoring loop
|
|
self.monitor_thread = threading.Thread(target=self._monitor_loop)
|
|
self.monitor_thread.daemon = True
|
|
self.monitor_thread.start()
|
|
|
|
print(f"Health daemon started on http://127.0.0.1:{self.port}")
|
|
print(f" - /health - Quick health check")
|
|
print(f" - /status - Full system status")
|
|
print(f"Health file: {self.health_file}")
|
|
|
|
def stop(self):
|
|
"""Stop the health daemon"""
|
|
self.running = False
|
|
if self.server:
|
|
self.server.shutdown()
|
|
print("Health daemon stopped")
|
|
|
|
def _monitor_loop(self):
|
|
"""Background monitoring loop"""
|
|
while self.running:
|
|
try:
|
|
self._update_health_file()
|
|
time.sleep(self.check_interval)
|
|
except Exception as e:
|
|
print(f"Monitor error: {e}")
|
|
time.sleep(5)
|
|
|
|
def _update_health_file(self):
|
|
"""Update the health status file"""
|
|
harness = get_harness()
|
|
|
|
try:
|
|
health_result = harness.execute("health_check")
|
|
system_result = harness.execute("system_info")
|
|
|
|
status = {
|
|
"timestamp": datetime.now().isoformat(),
|
|
"health": json.loads(health_result),
|
|
"system": json.loads(system_result)
|
|
}
|
|
|
|
self.health_file.write_text(json.dumps(status, indent=2))
|
|
self.last_health = status
|
|
|
|
except Exception as e:
|
|
print(f"Failed to update health file: {e}")
|
|
|
|
|
|
def main():
|
|
"""Run the health daemon"""
|
|
import signal
|
|
|
|
daemon = HealthDaemon()
|
|
|
|
def signal_handler(sig, frame):
|
|
print("\nShutting down...")
|
|
daemon.stop()
|
|
sys.exit(0)
|
|
|
|
signal.signal(signal.SIGINT, signal_handler)
|
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
|
|
daemon.start()
|
|
|
|
# Keep main thread alive
|
|
try:
|
|
while True:
|
|
time.sleep(1)
|
|
except KeyboardInterrupt:
|
|
daemon.stop()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|