378 lines
11 KiB
Python
378 lines
11 KiB
Python
|
|
"""
|
||
|
|
System Tools for Uni-Wizard
|
||
|
|
Monitor and control the VPS environment
|
||
|
|
"""
|
||
|
|
|
||
|
|
import os
|
||
|
|
import json
|
||
|
|
import subprocess
|
||
|
|
import platform
|
||
|
|
import psutil
|
||
|
|
from datetime import datetime, timedelta
|
||
|
|
from typing import Dict, List, Optional
|
||
|
|
|
||
|
|
from .registry import tool, registry
|
||
|
|
|
||
|
|
|
||
|
|
@tool(category="system")
|
||
|
|
def system_info() -> str:
|
||
|
|
"""
|
||
|
|
Get comprehensive system information.
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
JSON string with OS, CPU, memory, disk, and uptime info
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
# CPU info
|
||
|
|
cpu_count = psutil.cpu_count()
|
||
|
|
cpu_percent = psutil.cpu_percent(interval=1)
|
||
|
|
cpu_freq = psutil.cpu_freq()
|
||
|
|
|
||
|
|
# Memory info
|
||
|
|
memory = psutil.virtual_memory()
|
||
|
|
|
||
|
|
# Disk info
|
||
|
|
disk = psutil.disk_usage('/')
|
||
|
|
|
||
|
|
# Uptime
|
||
|
|
boot_time = datetime.fromtimestamp(psutil.boot_time())
|
||
|
|
uptime = datetime.now() - boot_time
|
||
|
|
|
||
|
|
# Load average (Linux only)
|
||
|
|
load_avg = os.getloadavg() if hasattr(os, 'getloadavg') else [0, 0, 0]
|
||
|
|
|
||
|
|
info = {
|
||
|
|
"hostname": platform.node(),
|
||
|
|
"os": {
|
||
|
|
"system": platform.system(),
|
||
|
|
"release": platform.release(),
|
||
|
|
"version": platform.version(),
|
||
|
|
"machine": platform.machine()
|
||
|
|
},
|
||
|
|
"cpu": {
|
||
|
|
"count": cpu_count,
|
||
|
|
"percent": cpu_percent,
|
||
|
|
"frequency_mhz": cpu_freq.current if cpu_freq else None
|
||
|
|
},
|
||
|
|
"memory": {
|
||
|
|
"total_gb": round(memory.total / (1024**3), 2),
|
||
|
|
"available_gb": round(memory.available / (1024**3), 2),
|
||
|
|
"percent_used": memory.percent
|
||
|
|
},
|
||
|
|
"disk": {
|
||
|
|
"total_gb": round(disk.total / (1024**3), 2),
|
||
|
|
"free_gb": round(disk.free / (1024**3), 2),
|
||
|
|
"percent_used": round((disk.used / disk.total) * 100, 1)
|
||
|
|
},
|
||
|
|
"uptime": {
|
||
|
|
"boot_time": boot_time.isoformat(),
|
||
|
|
"uptime_seconds": int(uptime.total_seconds()),
|
||
|
|
"uptime_human": str(timedelta(seconds=int(uptime.total_seconds())))
|
||
|
|
},
|
||
|
|
"load_average": {
|
||
|
|
"1min": round(load_avg[0], 2),
|
||
|
|
"5min": round(load_avg[1], 2),
|
||
|
|
"15min": round(load_avg[2], 2)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return json.dumps(info, indent=2)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return f"Error getting system info: {str(e)}"
|
||
|
|
|
||
|
|
|
||
|
|
@tool(category="system")
|
||
|
|
def process_list(filter_name: str = None) -> str:
|
||
|
|
"""
|
||
|
|
List running processes with optional name filter.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
filter_name: Optional process name to filter by
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
JSON list of processes with PID, name, CPU%, memory
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
processes = []
|
||
|
|
for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent', 'status']):
|
||
|
|
try:
|
||
|
|
info = proc.info
|
||
|
|
if filter_name and filter_name.lower() not in info['name'].lower():
|
||
|
|
continue
|
||
|
|
processes.append({
|
||
|
|
"pid": info['pid'],
|
||
|
|
"name": info['name'],
|
||
|
|
"cpu_percent": info['cpu_percent'],
|
||
|
|
"memory_percent": round(info['memory_percent'], 2) if info['memory_percent'] else 0,
|
||
|
|
"status": info['status']
|
||
|
|
})
|
||
|
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Sort by CPU usage
|
||
|
|
processes.sort(key=lambda x: x['cpu_percent'], reverse=True)
|
||
|
|
|
||
|
|
return json.dumps({
|
||
|
|
"count": len(processes),
|
||
|
|
"filter": filter_name,
|
||
|
|
"processes": processes[:50] # Limit to top 50
|
||
|
|
}, indent=2)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return f"Error listing processes: {str(e)}"
|
||
|
|
|
||
|
|
|
||
|
|
@tool(category="system")
|
||
|
|
def service_status(service_name: str) -> str:
|
||
|
|
"""
|
||
|
|
Check systemd service status.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
service_name: Name of the service (e.g., 'llama-server', 'syncthing@root')
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Service status information
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
result = subprocess.run(
|
||
|
|
['systemctl', 'status', service_name, '--no-pager'],
|
||
|
|
capture_output=True,
|
||
|
|
text=True
|
||
|
|
)
|
||
|
|
|
||
|
|
# Parse output
|
||
|
|
lines = result.stdout.split('\n')
|
||
|
|
status_info = {"service": service_name}
|
||
|
|
|
||
|
|
for line in lines:
|
||
|
|
if 'Active:' in line:
|
||
|
|
status_info['active'] = line.split(':', 1)[1].strip()
|
||
|
|
elif 'Loaded:' in line:
|
||
|
|
status_info['loaded'] = line.split(':', 1)[1].strip()
|
||
|
|
elif 'Main PID:' in line:
|
||
|
|
status_info['pid'] = line.split(':', 1)[1].strip()
|
||
|
|
elif 'Memory:' in line:
|
||
|
|
status_info['memory'] = line.split(':', 1)[1].strip()
|
||
|
|
elif 'CPU:' in line:
|
||
|
|
status_info['cpu'] = line.split(':', 1)[1].strip()
|
||
|
|
|
||
|
|
status_info['exit_code'] = result.returncode
|
||
|
|
|
||
|
|
return json.dumps(status_info, indent=2)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return f"Error checking service status: {str(e)}"
|
||
|
|
|
||
|
|
|
||
|
|
@tool(category="system")
|
||
|
|
def service_control(service_name: str, action: str) -> str:
|
||
|
|
"""
|
||
|
|
Control a systemd service (start, stop, restart, enable, disable).
|
||
|
|
|
||
|
|
Args:
|
||
|
|
service_name: Name of the service
|
||
|
|
action: start, stop, restart, enable, disable, status
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Result of the action
|
||
|
|
"""
|
||
|
|
valid_actions = ['start', 'stop', 'restart', 'enable', 'disable', 'status']
|
||
|
|
|
||
|
|
if action not in valid_actions:
|
||
|
|
return f"Invalid action. Use: {', '.join(valid_actions)}"
|
||
|
|
|
||
|
|
try:
|
||
|
|
result = subprocess.run(
|
||
|
|
['systemctl', action, service_name],
|
||
|
|
capture_output=True,
|
||
|
|
text=True
|
||
|
|
)
|
||
|
|
|
||
|
|
if result.returncode == 0:
|
||
|
|
return f"✓ Service '{service_name}' {action} successful"
|
||
|
|
else:
|
||
|
|
return f"✗ Service '{service_name}' {action} failed: {result.stderr}"
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return f"Error controlling service: {str(e)}"
|
||
|
|
|
||
|
|
|
||
|
|
@tool(category="system")
|
||
|
|
def health_check() -> str:
|
||
|
|
"""
|
||
|
|
Comprehensive health check of the VPS.
|
||
|
|
|
||
|
|
Checks:
|
||
|
|
- System resources (CPU, memory, disk)
|
||
|
|
- Critical services (llama-server, syncthing, timmy-agent)
|
||
|
|
- Network connectivity
|
||
|
|
- Inference endpoint
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Health report with status and recommendations
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
health = {
|
||
|
|
"timestamp": datetime.now().isoformat(),
|
||
|
|
"overall": "healthy",
|
||
|
|
"checks": {}
|
||
|
|
}
|
||
|
|
|
||
|
|
# System resources
|
||
|
|
memory = psutil.virtual_memory()
|
||
|
|
disk = psutil.disk_usage('/')
|
||
|
|
|
||
|
|
health["checks"]["memory"] = {
|
||
|
|
"status": "healthy" if memory.percent < 90 else "warning",
|
||
|
|
"percent_used": memory.percent,
|
||
|
|
"available_gb": round(memory.available / (1024**3), 2)
|
||
|
|
}
|
||
|
|
|
||
|
|
health["checks"]["disk"] = {
|
||
|
|
"status": "healthy" if disk.percent < 90 else "warning",
|
||
|
|
"percent_used": disk.percent,
|
||
|
|
"free_gb": round(disk.free / (1024**3), 2)
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check inference endpoint
|
||
|
|
try:
|
||
|
|
import urllib.request
|
||
|
|
req = urllib.request.urlopen('http://127.0.0.1:8081/health', timeout=5)
|
||
|
|
health["checks"]["inference"] = {"status": "healthy", "port": 8081}
|
||
|
|
except:
|
||
|
|
health["checks"]["inference"] = {"status": "down", "port": 8081}
|
||
|
|
health["overall"] = "degraded"
|
||
|
|
|
||
|
|
# Check services
|
||
|
|
services = ['llama-server', 'syncthing@root']
|
||
|
|
for svc in services:
|
||
|
|
result = subprocess.run(['systemctl', 'is-active', svc], capture_output=True, text=True)
|
||
|
|
health["checks"][svc] = {
|
||
|
|
"status": "healthy" if result.returncode == 0 else "down"
|
||
|
|
}
|
||
|
|
if result.returncode != 0:
|
||
|
|
health["overall"] = "degraded"
|
||
|
|
|
||
|
|
return json.dumps(health, indent=2)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return f"Error running health check: {str(e)}"
|
||
|
|
|
||
|
|
|
||
|
|
@tool(category="system")
|
||
|
|
def disk_usage(path: str = "/") -> str:
|
||
|
|
"""
|
||
|
|
Get disk usage for a path.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
path: Path to check (default: /)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Disk usage statistics
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
usage = psutil.disk_usage(path)
|
||
|
|
return json.dumps({
|
||
|
|
"path": path,
|
||
|
|
"total_gb": round(usage.total / (1024**3), 2),
|
||
|
|
"used_gb": round(usage.used / (1024**3), 2),
|
||
|
|
"free_gb": round(usage.free / (1024**3), 2),
|
||
|
|
"percent_used": round((usage.used / usage.total) * 100, 1)
|
||
|
|
}, indent=2)
|
||
|
|
except Exception as e:
|
||
|
|
return f"Error checking disk usage: {str(e)}"
|
||
|
|
|
||
|
|
|
||
|
|
# Auto-register all tools in this module
|
||
|
|
def register_all():
|
||
|
|
"""Register all system tools"""
|
||
|
|
registry.register(
|
||
|
|
name="system_info",
|
||
|
|
handler=system_info,
|
||
|
|
description="Get comprehensive system information (OS, CPU, memory, disk, uptime)",
|
||
|
|
category="system"
|
||
|
|
)
|
||
|
|
|
||
|
|
registry.register(
|
||
|
|
name="process_list",
|
||
|
|
handler=process_list,
|
||
|
|
description="List running processes with optional name filter",
|
||
|
|
parameters={
|
||
|
|
"type": "object",
|
||
|
|
"properties": {
|
||
|
|
"filter_name": {
|
||
|
|
"type": "string",
|
||
|
|
"description": "Optional process name to filter by"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
category="system"
|
||
|
|
)
|
||
|
|
|
||
|
|
registry.register(
|
||
|
|
name="service_status",
|
||
|
|
handler=service_status,
|
||
|
|
description="Check systemd service status",
|
||
|
|
parameters={
|
||
|
|
"type": "object",
|
||
|
|
"properties": {
|
||
|
|
"service_name": {
|
||
|
|
"type": "string",
|
||
|
|
"description": "Name of the systemd service"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"required": ["service_name"]
|
||
|
|
},
|
||
|
|
category="system"
|
||
|
|
)
|
||
|
|
|
||
|
|
registry.register(
|
||
|
|
name="service_control",
|
||
|
|
handler=service_control,
|
||
|
|
description="Control a systemd service (start, stop, restart, enable, disable)",
|
||
|
|
parameters={
|
||
|
|
"type": "object",
|
||
|
|
"properties": {
|
||
|
|
"service_name": {
|
||
|
|
"type": "string",
|
||
|
|
"description": "Name of the service"
|
||
|
|
},
|
||
|
|
"action": {
|
||
|
|
"type": "string",
|
||
|
|
"enum": ["start", "stop", "restart", "enable", "disable", "status"],
|
||
|
|
"description": "Action to perform"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"required": ["service_name", "action"]
|
||
|
|
},
|
||
|
|
category="system"
|
||
|
|
)
|
||
|
|
|
||
|
|
registry.register(
|
||
|
|
name="health_check",
|
||
|
|
handler=health_check,
|
||
|
|
description="Comprehensive health check of VPS (resources, services, inference)",
|
||
|
|
category="system"
|
||
|
|
)
|
||
|
|
|
||
|
|
registry.register(
|
||
|
|
name="disk_usage",
|
||
|
|
handler=disk_usage,
|
||
|
|
description="Get disk usage for a path",
|
||
|
|
parameters={
|
||
|
|
"type": "object",
|
||
|
|
"properties": {
|
||
|
|
"path": {
|
||
|
|
"type": "string",
|
||
|
|
"description": "Path to check",
|
||
|
|
"default": "/"
|
||
|
|
}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
category="system"
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
register_all()
|