Compare commits
1 Commits
fix/749-v2
...
burn/714-1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c2eb7bac74 |
83
scripts/mcp_watchdog.py
Executable file
83
scripts/mcp_watchdog.py
Executable file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
mcp-watchdog — Periodic cleanup of orphaned MCP processes.
|
||||
|
||||
Runs as a cron job or daemon to prevent process accumulation.
|
||||
|
||||
Usage:
|
||||
python scripts/mcp_watchdog.py # one-shot check
|
||||
python scripts/mcp_watchdog.py --daemon # continuous monitoring
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
CHECK_INTERVAL = 300 # 5 minutes
|
||||
MAX_MCP_PROCESSES = 10
|
||||
MAX_PROCESS_AGE = 3600 # 1 hour
|
||||
|
||||
|
||||
def count_mcp_processes() -> int:
|
||||
"""Count running MCP processes."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["pgrep", "-f", "mcp_server|morrowind|mcp-serve|fastmcp"],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return len([p for p in result.stdout.strip().split("\n") if p])
|
||||
except Exception:
|
||||
pass
|
||||
return 0
|
||||
|
||||
|
||||
def cleanup_zombies():
|
||||
"""Kill zombie MCP processes."""
|
||||
script = os.path.join(os.path.dirname(__file__), "mcp_zombie_cleanup.py")
|
||||
if os.path.exists(script):
|
||||
subprocess.run(
|
||||
[sys.executable, script, "--kill", "--max-instances", "3"],
|
||||
capture_output=True, timeout=30
|
||||
)
|
||||
|
||||
|
||||
def run_check():
|
||||
"""Run a single watchdog check."""
|
||||
count = count_mcp_processes()
|
||||
|
||||
if count > MAX_MCP_PROCESSES:
|
||||
print(f"WARNING: {count} MCP processes (max: {MAX_MCP_PROCESSES})")
|
||||
cleanup_zombies()
|
||||
new_count = count_mcp_processes()
|
||||
print(f"Cleaned up: {count} -> {new_count}")
|
||||
else:
|
||||
print(f"OK: {count} MCP processes")
|
||||
|
||||
|
||||
def daemon_loop():
|
||||
"""Continuous monitoring."""
|
||||
print(f"Starting MCP watchdog (interval={CHECK_INTERVAL}s, max={MAX_MCP_PROCESSES})")
|
||||
while True:
|
||||
try:
|
||||
run_check()
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
time.sleep(CHECK_INTERVAL)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="MCP process watchdog")
|
||||
parser.add_argument("--daemon", action="store_true", help="Run continuous monitoring")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.daemon:
|
||||
daemon_loop()
|
||||
else:
|
||||
run_check()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
210
scripts/mcp_zombie_cleanup.py
Executable file
210
scripts/mcp_zombie_cleanup.py
Executable file
@@ -0,0 +1,210 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
mcp-zombie-cleanup — Kill orphaned MCP server processes.
|
||||
|
||||
Fix for #714: ~80 zombie morrowind/mcp_server.py processes on Mac.
|
||||
|
||||
Usage:
|
||||
python scripts/mcp_zombie_cleanup.py # dry run
|
||||
python scripts/mcp_zombie_cleanup.py --kill # kill zombies
|
||||
python scripts/mcp_zombie_cleanup.py --status # show status
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from typing import List, Dict
|
||||
|
||||
# Patterns that identify MCP server processes
|
||||
MCP_PATTERNS = [
|
||||
"mcp_server",
|
||||
"morrowind",
|
||||
"mcp-serve",
|
||||
"mcp_tool",
|
||||
"fastmcp",
|
||||
]
|
||||
|
||||
# Keep at most this many instances per pattern
|
||||
MAX_INSTANCES = 3
|
||||
|
||||
# Kill processes older than this (seconds)
|
||||
MAX_AGE_SECONDS = 3600 # 1 hour
|
||||
|
||||
|
||||
def find_mcp_processes() -> List[Dict]:
|
||||
"""Find all MCP-related processes."""
|
||||
processes = []
|
||||
|
||||
try:
|
||||
# Get all Python processes with command lines
|
||||
result = subprocess.run(
|
||||
["ps", "aux"],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
|
||||
for line in result.stdout.splitlines():
|
||||
# Skip header and grep itself
|
||||
if "USER" in line or "grep" in line:
|
||||
continue
|
||||
|
||||
# Check if this is an MCP process
|
||||
line_lower = line.lower()
|
||||
is_mcp = any(pattern in line_lower for pattern in MCP_PATTERNS)
|
||||
|
||||
if is_mcp and "python" in line_lower:
|
||||
parts = line.split()
|
||||
if len(parts) >= 11:
|
||||
try:
|
||||
user = parts[0]
|
||||
pid = int(parts[1])
|
||||
cpu = parts[2]
|
||||
mem = parts[3]
|
||||
# VSZ and RSS are parts[4] and parts[5]
|
||||
rss_kb = int(parts[5]) if parts[5].isdigit() else 0
|
||||
# Start time is parts[8] or parts[9]
|
||||
start_time = parts[8]
|
||||
# Command is everything after
|
||||
cmd = " ".join(parts[10:])
|
||||
|
||||
processes.append({
|
||||
"user": user,
|
||||
"pid": pid,
|
||||
"cpu": cpu,
|
||||
"mem": mem,
|
||||
"rss_kb": rss_kb,
|
||||
"start_time": start_time,
|
||||
"cmd": cmd[:200],
|
||||
})
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"Error finding processes: {e}")
|
||||
|
||||
return processes
|
||||
|
||||
|
||||
def get_process_age(pid: int) -> float:
|
||||
"""Get process age in seconds."""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ps", "-o", "etimes=", "-p", str(pid)],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return float(result.stdout.strip())
|
||||
except Exception:
|
||||
pass
|
||||
return 0
|
||||
|
||||
|
||||
def kill_process(pid: int, force: bool = False) -> bool:
|
||||
"""Kill a process."""
|
||||
try:
|
||||
sig = signal.SIGKILL if force else signal.SIGTERM
|
||||
os.kill(pid, sig)
|
||||
return True
|
||||
except ProcessLookupError:
|
||||
return True # Already dead
|
||||
except PermissionError:
|
||||
print(f" Permission denied for PID {pid}")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f" Error killing PID {pid}: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Clean up zombie MCP processes")
|
||||
parser.add_argument("--kill", action="store_true", help="Kill zombie processes")
|
||||
parser.add_argument("--force", action="store_true", help="Use SIGKILL instead of SIGTERM")
|
||||
parser.add_argument("--status", action="store_true", help="Show status only")
|
||||
parser.add_argument("--max-instances", type=int, default=MAX_INSTANCES)
|
||||
parser.add_argument("--max-age", type=int, default=MAX_AGE_SECONDS)
|
||||
args = parser.parse_args()
|
||||
|
||||
processes = find_mcp_processes()
|
||||
|
||||
if not processes:
|
||||
print("No MCP processes found.")
|
||||
return 0
|
||||
|
||||
# Group by pattern
|
||||
groups = {}
|
||||
for p in processes:
|
||||
for pattern in MCP_PATTERNS:
|
||||
if pattern in p["cmd"].lower():
|
||||
if pattern not in groups:
|
||||
groups[pattern] = []
|
||||
groups[pattern].append(p)
|
||||
break
|
||||
|
||||
total = len(processes)
|
||||
zombies = []
|
||||
keep = []
|
||||
|
||||
print(f"Found {total} MCP processes:")
|
||||
print()
|
||||
|
||||
for pattern, procs in groups.items():
|
||||
# Sort by PID (higher = newer)
|
||||
procs.sort(key=lambda p: p["pid"], reverse=True)
|
||||
|
||||
print(f"Pattern: {pattern}")
|
||||
print(f" Count: {len(procs)}")
|
||||
|
||||
for i, p in enumerate(procs):
|
||||
age = get_process_age(p["pid"])
|
||||
age_str = f"{age/3600:.1f}h" if age > 3600 else f"{age/60:.0f}m"
|
||||
|
||||
status = "KEEP" if i < args.max_instances else "ZOMBIE"
|
||||
if age > args.max_age:
|
||||
status = "STALE"
|
||||
|
||||
rss_mb = p["rss_kb"] / 1024
|
||||
|
||||
print(f" PID {p['pid']:>6} | {rss_mb:>6.1f}MB | {age_str:>6} | {status} | {p['cmd'][:60]}")
|
||||
|
||||
if status in ("ZOMBIE", "STALE"):
|
||||
zombies.append(p)
|
||||
else:
|
||||
keep.append(p)
|
||||
|
||||
print()
|
||||
print(f"Summary:")
|
||||
print(f" Total processes: {total}")
|
||||
print(f" Keep: {len(keep)}")
|
||||
print(f" Zombies: {len(zombies)}")
|
||||
print(f" Total RSS: {sum(p['rss_kb'] for p in processes) / 1024:.1f} MB")
|
||||
|
||||
if args.status:
|
||||
return 0
|
||||
|
||||
if not zombies:
|
||||
print()
|
||||
print("No zombies to clean up.")
|
||||
return 0
|
||||
|
||||
if not args.kill:
|
||||
print()
|
||||
print("DRY RUN: Add --kill to terminate zombie processes")
|
||||
return 0
|
||||
|
||||
print()
|
||||
print("Killing zombie processes...")
|
||||
killed = 0
|
||||
for p in zombies:
|
||||
if kill_process(p["pid"], force=args.force):
|
||||
killed += 1
|
||||
print(f" Killed PID {p['pid']}")
|
||||
time.sleep(0.1) # Brief pause between kills
|
||||
|
||||
print(f"
|
||||
Killed {killed}/{len(zombies)} processes.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user