Files
timmy-home/uni-wizard/tools/system_tools.py

378 lines
11 KiB
Python
Raw Normal View History

"""
System Tools for Uni-Wizard
Monitor and control the VPS environment
"""
import os
import json
import subprocess
import platform
import psutil
from datetime import datetime, timedelta
from typing import Dict, List, Optional
from .registry import tool, registry
@tool(category="system")
def system_info() -> str:
"""
Get comprehensive system information.
Returns:
JSON string with OS, CPU, memory, disk, and uptime info
"""
try:
# CPU info
cpu_count = psutil.cpu_count()
cpu_percent = psutil.cpu_percent(interval=1)
cpu_freq = psutil.cpu_freq()
# Memory info
memory = psutil.virtual_memory()
# Disk info
disk = psutil.disk_usage('/')
# Uptime
boot_time = datetime.fromtimestamp(psutil.boot_time())
uptime = datetime.now() - boot_time
# Load average (Linux only)
load_avg = os.getloadavg() if hasattr(os, 'getloadavg') else [0, 0, 0]
info = {
"hostname": platform.node(),
"os": {
"system": platform.system(),
"release": platform.release(),
"version": platform.version(),
"machine": platform.machine()
},
"cpu": {
"count": cpu_count,
"percent": cpu_percent,
"frequency_mhz": cpu_freq.current if cpu_freq else None
},
"memory": {
"total_gb": round(memory.total / (1024**3), 2),
"available_gb": round(memory.available / (1024**3), 2),
"percent_used": memory.percent
},
"disk": {
"total_gb": round(disk.total / (1024**3), 2),
"free_gb": round(disk.free / (1024**3), 2),
"percent_used": round((disk.used / disk.total) * 100, 1)
},
"uptime": {
"boot_time": boot_time.isoformat(),
"uptime_seconds": int(uptime.total_seconds()),
"uptime_human": str(timedelta(seconds=int(uptime.total_seconds())))
},
"load_average": {
"1min": round(load_avg[0], 2),
"5min": round(load_avg[1], 2),
"15min": round(load_avg[2], 2)
}
}
return json.dumps(info, indent=2)
except Exception as e:
return f"Error getting system info: {str(e)}"
@tool(category="system")
def process_list(filter_name: str = None) -> str:
"""
List running processes with optional name filter.
Args:
filter_name: Optional process name to filter by
Returns:
JSON list of processes with PID, name, CPU%, memory
"""
try:
processes = []
for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent', 'status']):
try:
info = proc.info
if filter_name and filter_name.lower() not in info['name'].lower():
continue
processes.append({
"pid": info['pid'],
"name": info['name'],
"cpu_percent": info['cpu_percent'],
"memory_percent": round(info['memory_percent'], 2) if info['memory_percent'] else 0,
"status": info['status']
})
except (psutil.NoSuchProcess, psutil.AccessDenied):
continue
# Sort by CPU usage
processes.sort(key=lambda x: x['cpu_percent'], reverse=True)
return json.dumps({
"count": len(processes),
"filter": filter_name,
"processes": processes[:50] # Limit to top 50
}, indent=2)
except Exception as e:
return f"Error listing processes: {str(e)}"
@tool(category="system")
def service_status(service_name: str) -> str:
"""
Check systemd service status.
Args:
service_name: Name of the service (e.g., 'llama-server', 'syncthing@root')
Returns:
Service status information
"""
try:
result = subprocess.run(
['systemctl', 'status', service_name, '--no-pager'],
capture_output=True,
text=True
)
# Parse output
lines = result.stdout.split('\n')
status_info = {"service": service_name}
for line in lines:
if 'Active:' in line:
status_info['active'] = line.split(':', 1)[1].strip()
elif 'Loaded:' in line:
status_info['loaded'] = line.split(':', 1)[1].strip()
elif 'Main PID:' in line:
status_info['pid'] = line.split(':', 1)[1].strip()
elif 'Memory:' in line:
status_info['memory'] = line.split(':', 1)[1].strip()
elif 'CPU:' in line:
status_info['cpu'] = line.split(':', 1)[1].strip()
status_info['exit_code'] = result.returncode
return json.dumps(status_info, indent=2)
except Exception as e:
return f"Error checking service status: {str(e)}"
@tool(category="system")
def service_control(service_name: str, action: str) -> str:
"""
Control a systemd service (start, stop, restart, enable, disable).
Args:
service_name: Name of the service
action: start, stop, restart, enable, disable, status
Returns:
Result of the action
"""
valid_actions = ['start', 'stop', 'restart', 'enable', 'disable', 'status']
if action not in valid_actions:
return f"Invalid action. Use: {', '.join(valid_actions)}"
try:
result = subprocess.run(
['systemctl', action, service_name],
capture_output=True,
text=True
)
if result.returncode == 0:
return f"✓ Service '{service_name}' {action} successful"
else:
return f"✗ Service '{service_name}' {action} failed: {result.stderr}"
except Exception as e:
return f"Error controlling service: {str(e)}"
@tool(category="system")
def health_check() -> str:
"""
Comprehensive health check of the VPS.
Checks:
- System resources (CPU, memory, disk)
- Critical services (llama-server, syncthing, timmy-agent)
- Network connectivity
- Inference endpoint
Returns:
Health report with status and recommendations
"""
try:
health = {
"timestamp": datetime.now().isoformat(),
"overall": "healthy",
"checks": {}
}
# System resources
memory = psutil.virtual_memory()
disk = psutil.disk_usage('/')
health["checks"]["memory"] = {
"status": "healthy" if memory.percent < 90 else "warning",
"percent_used": memory.percent,
"available_gb": round(memory.available / (1024**3), 2)
}
health["checks"]["disk"] = {
"status": "healthy" if disk.percent < 90 else "warning",
"percent_used": disk.percent,
"free_gb": round(disk.free / (1024**3), 2)
}
# Check inference endpoint
try:
import urllib.request
req = urllib.request.urlopen('http://127.0.0.1:8081/health', timeout=5)
health["checks"]["inference"] = {"status": "healthy", "port": 8081}
except:
health["checks"]["inference"] = {"status": "down", "port": 8081}
health["overall"] = "degraded"
# Check services
services = ['llama-server', 'syncthing@root']
for svc in services:
result = subprocess.run(['systemctl', 'is-active', svc], capture_output=True, text=True)
health["checks"][svc] = {
"status": "healthy" if result.returncode == 0 else "down"
}
if result.returncode != 0:
health["overall"] = "degraded"
return json.dumps(health, indent=2)
except Exception as e:
return f"Error running health check: {str(e)}"
@tool(category="system")
def disk_usage(path: str = "/") -> str:
"""
Get disk usage for a path.
Args:
path: Path to check (default: /)
Returns:
Disk usage statistics
"""
try:
usage = psutil.disk_usage(path)
return json.dumps({
"path": path,
"total_gb": round(usage.total / (1024**3), 2),
"used_gb": round(usage.used / (1024**3), 2),
"free_gb": round(usage.free / (1024**3), 2),
"percent_used": round((usage.used / usage.total) * 100, 1)
}, indent=2)
except Exception as e:
return f"Error checking disk usage: {str(e)}"
# Auto-register all tools in this module
def register_all():
"""Register all system tools"""
registry.register(
name="system_info",
handler=system_info,
description="Get comprehensive system information (OS, CPU, memory, disk, uptime)",
category="system"
)
registry.register(
name="process_list",
handler=process_list,
description="List running processes with optional name filter",
parameters={
"type": "object",
"properties": {
"filter_name": {
"type": "string",
"description": "Optional process name to filter by"
}
}
},
category="system"
)
registry.register(
name="service_status",
handler=service_status,
description="Check systemd service status",
parameters={
"type": "object",
"properties": {
"service_name": {
"type": "string",
"description": "Name of the systemd service"
}
},
"required": ["service_name"]
},
category="system"
)
registry.register(
name="service_control",
handler=service_control,
description="Control a systemd service (start, stop, restart, enable, disable)",
parameters={
"type": "object",
"properties": {
"service_name": {
"type": "string",
"description": "Name of the service"
},
"action": {
"type": "string",
"enum": ["start", "stop", "restart", "enable", "disable", "status"],
"description": "Action to perform"
}
},
"required": ["service_name", "action"]
},
category="system"
)
registry.register(
name="health_check",
handler=health_check,
description="Comprehensive health check of VPS (resources, services, inference)",
category="system"
)
registry.register(
name="disk_usage",
handler=disk_usage,
description="Get disk usage for a path",
parameters={
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Path to check",
"default": "/"
}
}
},
category="system"
)
register_all()