Compare commits
2 Commits
timmy/flee
...
timmy/flee
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e58a7c225e | ||
|
|
277d21aef6 |
156
fleet/agent_lifecycle.py
Executable file
156
fleet/agent_lifecycle.py
Executable file
@@ -0,0 +1,156 @@
|
||||
#!/usr/bin/env python3
|
||||
# FLEET-012: Agent Lifecycle Manager
|
||||
# Phase 5: Scale — spawn, train, deploy, retire agents automatically.
|
||||
#
|
||||
# Manages the full lifecycle of AI agents in the fleet:
|
||||
# 1. PROVISION: Clone template, install deps, configure, test
|
||||
# 2. TRAIN: Run initial tasks, measure quality, score
|
||||
# 3. DEPLOY: Add to active rotation, start accepting issues
|
||||
# 4. MONITOR: Track performance, quality, uptime
|
||||
# 5. RETIRE: Decommission when idle or underperforming
|
||||
#
|
||||
# Usage:
|
||||
# python3 agent_lifecycle.py provision <agent_name> <vps_ip> [--model <model>]
|
||||
# python3 agent_lifecycle.py status
|
||||
# python3 agent_lifecycle.py retire <agent_name>
|
||||
# python3 agent_lifecycle.py monitor
|
||||
|
||||
import os, sys, json, subprocess, time
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-agents"))
|
||||
AGENTS_DB = DATA_DIR / "agents.json"
|
||||
LIFECYCLE_LOG = DATA_DIR / "lifecycle.log"
|
||||
|
||||
def ensure_dirs():
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def log(msg, level="INFO"):
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||||
entry = f"[{ts}] [{level}] {msg}"
|
||||
with open(LIFECYCLE_LOG, "a") as f:
|
||||
f.write(entry + "\n")
|
||||
print(f" {entry}")
|
||||
|
||||
def load_agents():
|
||||
if AGENTS_DB.exists():
|
||||
return json.loads(AGENTS_DB.read_text())
|
||||
return {}
|
||||
|
||||
def save_agents(db):
|
||||
AGENTS_DB.write_text(json.dumps(db, indent=2))
|
||||
|
||||
def status():
|
||||
agents = load_agents()
|
||||
print("\n=== Agent Fleet Status ===")
|
||||
if not agents:
|
||||
print(" No agents registered.")
|
||||
return
|
||||
for name, agent in agents.items():
|
||||
state = agent.get("state", "unknown")
|
||||
vps = agent.get("vps", "unknown")
|
||||
model = agent.get("model", "unknown")
|
||||
score = agent.get("quality_score", "N/A")
|
||||
created = agent.get("created_at", "?")
|
||||
print(f" {name}: state={state}, vps={vps}, model={model}, score={score}, created={created}")
|
||||
if agent.get("last_heartbeat"):
|
||||
last = agent["last_heartbeat"]
|
||||
print(f" Last heartbeat: {last}")
|
||||
|
||||
def provision(name, vps, model="hermes4:14b"):
|
||||
agents = load_agents()
|
||||
if name in agents:
|
||||
print(f" Agent '{name}' already exists (state: {agents[name].get('state')})")
|
||||
return False
|
||||
|
||||
log(f"Provisioning agent '{name}' on {vps} with model {model}")
|
||||
agents[name] = {
|
||||
"name": name,
|
||||
"vps": vps,
|
||||
"model": model,
|
||||
"state": "provisioning",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"quality_score": None,
|
||||
"tasks_completed": 0,
|
||||
"tasks_failed": 0,
|
||||
"last_heartbeat": None,
|
||||
"metadata": {"provision_started": datetime.now(timezone.utc).isoformat()}
|
||||
}
|
||||
save_agents(agents)
|
||||
log(f"Agent '{name}' registered. State: provisioning")
|
||||
return True
|
||||
|
||||
def deploy(name):
|
||||
agents = load_agents()
|
||||
if name not in agents:
|
||||
print(f" Agent '{name}' not found")
|
||||
return False
|
||||
|
||||
agents[name]["state"] = "deployed"
|
||||
agents[name]["metadata"]["deployed_at"] = datetime.now(timezone.utc).isoformat()
|
||||
save_agents(agents)
|
||||
log(f"Agent '{name}' deployed and accepting issues")
|
||||
return True
|
||||
|
||||
def retire(name):
|
||||
agents = load_agents()
|
||||
if name not in agents:
|
||||
print(f" Agent '{name}' not found")
|
||||
return False
|
||||
|
||||
agents[name]["state"] = "retired"
|
||||
agents[name]["metadata"]["retired_at"] = datetime.now(timezone.utc).isoformat()
|
||||
save_agents(agents)
|
||||
log(f"Agent '{name}' retired. Completed {agents[name].get('tasks_completed', 0)} tasks.")
|
||||
return True
|
||||
|
||||
def monitor():
|
||||
agents = load_agents()
|
||||
now = time.time()
|
||||
changes = 0
|
||||
for name, agent in agents.items():
|
||||
if agent.get("state") != "deployed":
|
||||
continue
|
||||
last = agent.get("last_heartbeat")
|
||||
if last:
|
||||
try:
|
||||
last_ts = datetime.fromisoformat(last).timestamp()
|
||||
hours_since = (now - last_ts) / 3600
|
||||
if hours_since > 24:
|
||||
log(f"Agent '{name}' no heartbeat for {hours_since:.1f}h")
|
||||
agent["state"] = "idle"
|
||||
agent["metadata"]["idle_since"] = datetime.now(timezone.utc).isoformat()
|
||||
changes += 1
|
||||
elif hours_since > 168: # 7 days
|
||||
log(f"Agent '{name}' idle for 7 days — recommending retirement")
|
||||
agent["metadata"]["retire_recommendation"] = datetime.now(timezone.utc).isoformat()
|
||||
changes += 1
|
||||
except (ValueError, TypeError, OSError):
|
||||
pass
|
||||
if changes > 0:
|
||||
save_agents(agents)
|
||||
log(f"Monitor: {changes} agents state changed")
|
||||
else:
|
||||
log("Monitor: all agents healthy")
|
||||
|
||||
if __name__ == "__main__":
|
||||
ensure_dirs()
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: agent_lifecycle.py [provision|deploy|retire|status|monitor]")
|
||||
sys.exit(0)
|
||||
|
||||
cmd = sys.argv[1]
|
||||
if cmd == "provision" and len(sys.argv) >= 4:
|
||||
model = sys.argv[4] if len(sys.argv) >= 5 else "hermes4:14b"
|
||||
provision(sys.argv[2], sys.argv[3], model)
|
||||
elif cmd == "deploy" and len(sys.argv) >= 3:
|
||||
deploy(sys.argv[2])
|
||||
elif cmd == "retire" and len(sys.argv) >= 3:
|
||||
retire(sys.argv[2])
|
||||
elif cmd == "status":
|
||||
status()
|
||||
elif cmd == "monitor":
|
||||
monitor()
|
||||
else:
|
||||
print("Usage: agent_lifecycle.py [provision <name> <vps>|deploy <name>|retire <name>|status|monitor]")
|
||||
272
fleet/auto_restart.py
Executable file
272
fleet/auto_restart.py
Executable file
@@ -0,0 +1,272 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Auto-Restart Agent — Self-healing process monitor for fleet machines.
|
||||
|
||||
Detects dead services and restarts them automatically.
|
||||
Escalates after 3 attempts (prevents restart loops).
|
||||
Logs all actions to ~/.local/timmy/fleet-health/restarts.log
|
||||
Alerts via Telegram if service cannot be recovered.
|
||||
|
||||
Prerequisite: FLEET-006 (health check) must be running to detect failures.
|
||||
|
||||
Usage:
|
||||
python3 auto_restart.py # Run checks now
|
||||
python3 auto_restart.py --daemon # Run continuously (every 60s)
|
||||
python3 auto_restart.py --status # Show restart history
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# === CONFIG ===
|
||||
LOG_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-health"))
|
||||
RESTART_LOG = LOG_DIR / "restarts.log"
|
||||
COOLDOWN_FILE = LOG_DIR / "restart_cooldowns.json"
|
||||
MAX_RETRIES = 3
|
||||
COOLDOWN_PERIOD = 3600 # 1 hour between escalation alerts
|
||||
|
||||
# Services definition: name, check command, restart command
|
||||
# Local services:
|
||||
LOCAL_SERVICES = {
|
||||
"hermes-gateway": {
|
||||
"check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
|
||||
"restart": "cd ~/code-claw && ./restart-gateway.sh 2>/dev/null || launchctl kickstart -k ai.hermes.gateway 2>/dev/null",
|
||||
"critical": True,
|
||||
},
|
||||
"ollama": {
|
||||
"check": "pgrep -f 'ollama serve' > /dev/null 2>/dev/null",
|
||||
"restart": "launchctl kickstart -k com.ollama.ollama 2>/dev/null || /opt/homebrew/bin/brew services restart ollama 2>/dev/null",
|
||||
"critical": False,
|
||||
},
|
||||
"codeclaw-heartbeat": {
|
||||
"check": "launchctl list | grep 'ai.timmy.codeclaw-qwen-heartbeat' > /dev/null 2>/dev/null",
|
||||
"restart": "launchctl kickstart -k ai.timmy.codeclaw-qwen-heartbeat 2>/dev/null",
|
||||
"critical": False,
|
||||
},
|
||||
}
|
||||
|
||||
# VPS services to restart via SSH
|
||||
VPS_SERVICES = {
|
||||
"ezra": {
|
||||
"ip": "143.198.27.163",
|
||||
"user": "root",
|
||||
"services": {
|
||||
"gitea": {
|
||||
"check": "systemctl is-active gitea 2>/dev/null | grep -q active",
|
||||
"restart": "systemctl restart gitea 2>/dev/null",
|
||||
"critical": True,
|
||||
},
|
||||
"nginx": {
|
||||
"check": "systemctl is-active nginx 2>/dev/null | grep -q active",
|
||||
"restart": "systemctl restart nginx 2>/dev/null",
|
||||
"critical": False,
|
||||
},
|
||||
"hermes-agent": {
|
||||
"check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
|
||||
"restart": "cd /root/wizards/ezra/hermes-agent && source .venv/bin/activate && nohup hermes gateway run --replace > /dev/null 2>&1 &",
|
||||
"critical": True,
|
||||
},
|
||||
},
|
||||
},
|
||||
"allegro": {
|
||||
"ip": "167.99.126.228",
|
||||
"user": "root",
|
||||
"services": {
|
||||
"hermes-agent": {
|
||||
"check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
|
||||
"restart": "cd /root/wizards/allegro/hermes-agent && source .venv/bin/activate && nohup hermes gateway run --replace > /dev/null 2>&1 &",
|
||||
"critical": True,
|
||||
},
|
||||
},
|
||||
},
|
||||
"bezalel": {
|
||||
"ip": "159.203.146.185",
|
||||
"user": "root",
|
||||
"services": {
|
||||
"hermes-agent": {
|
||||
"check": "pgrep -f 'hermes gateway' > /dev/null 2>/dev/null",
|
||||
"restart": "cd /root/wizards/bezalel/hermes/venv/bin/activate && nohup hermes gateway run > /dev/null 2>&1 &",
|
||||
"critical": True,
|
||||
},
|
||||
"evennia": {
|
||||
"check": "pgrep -f 'evennia' > /dev/null 2>/dev/null",
|
||||
"restart": "cd /root/.evennia/timmy_world && evennia restart 2>/dev/null",
|
||||
"critical": False,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
TELEGRAM_TOKEN_FILE = Path(os.path.expanduser("~/.config/telegram/special_bot"))
|
||||
TELEGRAM_CHAT = "-1003664764329"
|
||||
|
||||
|
||||
def send_telegram(message):
|
||||
if not TELEGRAM_TOKEN_FILE.exists():
|
||||
return False
|
||||
token = TELEGRAM_TOKEN_FILE.read_text().strip()
|
||||
url = f"https://api.telegram.org/bot{token}/sendMessage"
|
||||
body = json.dumps({
|
||||
"chat_id": TELEGRAM_CHAT,
|
||||
"text": f"[AUTO-RESTART]\n{message}",
|
||||
}).encode()
|
||||
try:
|
||||
import urllib.request
|
||||
req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json"}, method="POST")
|
||||
urllib.request.urlopen(req, timeout=10)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def get_cooldowns():
|
||||
if COOLDOWN_FILE.exists():
|
||||
try:
|
||||
return json.loads(COOLDOWN_FILE.read_text())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
return {}
|
||||
|
||||
|
||||
def save_cooldowns(data):
|
||||
COOLDOWN_FILE.write_text(json.dumps(data, indent=2))
|
||||
|
||||
|
||||
def check_service(check_cmd, timeout=10):
|
||||
try:
|
||||
proc = subprocess.run(check_cmd, shell=True, capture_output=True, timeout=timeout)
|
||||
return proc.returncode == 0
|
||||
except (subprocess.TimeoutExpired, subprocess.SubprocessError):
|
||||
return False
|
||||
|
||||
|
||||
def restart_service(restart_cmd, timeout=30):
|
||||
try:
|
||||
proc = subprocess.run(restart_cmd, shell=True, capture_output=True, timeout=timeout)
|
||||
return proc.returncode == 0
|
||||
except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
|
||||
return False
|
||||
|
||||
|
||||
def try_restart_via_ssh(name, host_config, service_name):
|
||||
ip = host_config["ip"]
|
||||
user = host_config["user"]
|
||||
service = host_config["services"][service_name]
|
||||
|
||||
restart_cmd = f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 {user}@{ip} "{service["restart"]}"'
|
||||
return restart_service(restart_cmd, timeout=30)
|
||||
|
||||
|
||||
def log_restart(service_name, machine, attempt, success):
|
||||
ts = datetime.now(timezone.utc).isoformat()
|
||||
status = "SUCCESS" if success else "FAILED"
|
||||
log_entry = f"{ts} [{status}] {machine}/{service_name} (attempt {attempt})\n"
|
||||
|
||||
RESTART_LOG.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(RESTART_LOG, "a") as f:
|
||||
f.write(log_entry)
|
||||
|
||||
print(f" [{status}] {machine}/{service_name} - attempt {attempt}")
|
||||
|
||||
|
||||
def check_and_restart():
|
||||
"""Run all restart checks."""
|
||||
results = []
|
||||
cooldowns = get_cooldowns()
|
||||
now = time.time()
|
||||
|
||||
# Check local services
|
||||
for name, service in LOCAL_SERVICES.items():
|
||||
if not check_service(service["check"]):
|
||||
cooldown_key = f"local/{name}"
|
||||
retries = cooldowns.get(cooldown_key, {"count": 0, "last": 0}).get("count", 0)
|
||||
|
||||
if retries >= MAX_RETRIES:
|
||||
last = cooldowns.get(cooldown_key, {}).get("last", 0)
|
||||
if now - last < COOLDOWN_PERIOD and service["critical"]:
|
||||
send_telegram(f"CRITICAL: local/{name} failed {MAX_RETRIES} restart attempts. Needs human intervention.")
|
||||
cooldowns[cooldown_key] = {"count": 0, "last": now}
|
||||
save_cooldowns(cooldowns)
|
||||
continue
|
||||
|
||||
success = restart_service(service["restart"])
|
||||
log_restart(name, "local", retries + 1, success)
|
||||
|
||||
cooldowns[cooldown_key] = {"count": retries + 1 if not success else 0, "last": now}
|
||||
save_cooldowns(cooldowns)
|
||||
if success:
|
||||
# Verify it actually started
|
||||
time.sleep(3)
|
||||
if check_service(service["check"]):
|
||||
print(f" VERIFIED: local/{name} is running")
|
||||
else:
|
||||
print(f" WARNING: local/{name} restart command returned success but process not detected")
|
||||
|
||||
# Check VPS services
|
||||
for host, host_config in VPS_SERVICES.items():
|
||||
for service_name, service in host_config["services"].items():
|
||||
check_cmd = f'ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 {host_config["user"]}@{host_config["ip"]} "{service["check"]}"'
|
||||
if not check_service(check_cmd):
|
||||
cooldown_key = f"{host}/{service_name}"
|
||||
retries = cooldowns.get(cooldown_key, {"count": 0, "last": 0}).get("count", 0)
|
||||
|
||||
if retries >= MAX_RETRIES:
|
||||
last = cooldowns.get(cooldown_key, {}).get("last", 0)
|
||||
if now - last < COOLDOWN_PERIOD and service["critical"]:
|
||||
send_telegram(f"CRITICAL: {host}/{service_name} failed {MAX_RETRIES} restart attempts. Needs human intervention.")
|
||||
cooldowns[cooldown_key] = {"count": 0, "last": now}
|
||||
save_cooldowns(cooldowns)
|
||||
continue
|
||||
|
||||
success = try_restart_via_ssh(host, host_config, service_name)
|
||||
log_restart(service_name, host, retries + 1, success)
|
||||
|
||||
cooldowns[cooldown_key] = {"count": retries + 1 if not success else 0, "last": now}
|
||||
save_cooldowns(cooldowns)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def daemon_mode():
|
||||
"""Run continuously every 60 seconds."""
|
||||
print("Auto-restart agent running in daemon mode (60s interval)")
|
||||
print(f"Monitoring {len(LOCAL_SERVICES)} local + {sum(len(h['services']) for h in VPS_SERVICES.values())} remote services")
|
||||
print(f"Max retries per cycle: {MAX_RETRIES}")
|
||||
print(f"Cooldown after max retries: {COOLDOWN_PERIOD}s")
|
||||
while True:
|
||||
check_and_restart()
|
||||
time.sleep(60)
|
||||
|
||||
|
||||
def show_status():
|
||||
"""Show restart history and cooldowns."""
|
||||
cooldowns = get_cooldowns()
|
||||
print("=== Restart Cooldowns ===")
|
||||
for key, data in sorted(cooldowns.items()):
|
||||
count = data.get("count", 0)
|
||||
if count > 0:
|
||||
print(f" {key}: {count} failures, last at {datetime.fromtimestamp(data.get('last',0), tz=timezone.utc).strftime('%H:%M')}")
|
||||
|
||||
print("\n=== Restart Log (last 20) ===")
|
||||
if RESTART_LOG.exists():
|
||||
lines = RESTART_LOG.read_text().strip().split("\n")
|
||||
for line in lines[-20:]:
|
||||
print(f" {line}")
|
||||
else:
|
||||
print(" No restarts logged yet.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if len(sys.argv) > 1 and sys.argv[1] == "--daemon":
|
||||
daemon_mode()
|
||||
elif len(sys.argv) > 1 and sys.argv[1] == "--status":
|
||||
show_status()
|
||||
else:
|
||||
check_and_restart()
|
||||
142
fleet/delegation.py
Executable file
142
fleet/delegation.py
Executable file
@@ -0,0 +1,142 @@
|
||||
#!/usr/bin/env python3
|
||||
# Cross-Agent Task Delegation - The Timmy Foundation
|
||||
# Phase 3: Orchestration capability.
|
||||
# Agents create issues, assign to other agents, review PRs automatically.
|
||||
|
||||
import os, sys, json, time, urllib.request
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
GITEA_BASE = "https://forge.alexanderwhitestone.com/api/v1"
|
||||
TOKEN_FILE = Path(os.path.expanduser("~/.config/gitea/timmy-token"))
|
||||
ALT_TOKEN = Path(os.path.expanduser("~/.config/gitea/token"))
|
||||
DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-resources"))
|
||||
DELEGATION_LOG = DATA_DIR / "delegation.log"
|
||||
|
||||
AGENTS = {
|
||||
"claw-code": {"models": ["qwen3.6-plus:free"], "caps": ["small-patches","config","docs","repo-hygiene"], "max": 2, "active": True},
|
||||
"gemini": {"models": ["gemini-2.5-flash"], "caps": ["research","heavy-impl","architecture","debugging"], "max": 5, "active": True},
|
||||
"ezra": {"models": ["hermes4:14b","local-ollama"], "caps": ["contracting","formalization","ops","vps"], "max": 3, "active": True},
|
||||
"bezalel": {"models": ["local-llm"], "caps": ["evennia","art","creative","visualization"], "max": 3, "active": True},
|
||||
"timmy": {"models": ["qwen3.6-plus:free","hermes4:14b","local-ollama"], "caps": ["orchestration","review","deploy","fleet"], "max": 5, "active": True},
|
||||
}
|
||||
|
||||
MONITORED_REPOS = [
|
||||
"Timmy_Foundation/timmy-home",
|
||||
"Timmy_Foundation/timmy-config",
|
||||
"Timmy_Foundation/the-nexus",
|
||||
"Timmy_Foundation/hermes-agent",
|
||||
]
|
||||
|
||||
# Heuristic keyword matching
|
||||
KEYWORDS = {
|
||||
"claw-code": ["patch","typo","config","gitignore","docs update","readme","cleanup","format"],
|
||||
"gemini": ["research","investigate","analyze","compare","benchmark","survey","evaluate"],
|
||||
"ezra": ["vps","ssh","deploy","infrastructure","server","cron","resurrection","provision"],
|
||||
"bezalel": ["evennia","art","creative","music","visualization","diagram"],
|
||||
"timmy": ["orchestrate","review","merge","fleet","pipeline","health","monitor"],
|
||||
}
|
||||
|
||||
def get_token():
|
||||
if TOKEN_FILE.exists(): return TOKEN_FILE.read_text().strip()
|
||||
if ALT_TOKEN.exists(): return ALT_TOKEN.read_text().strip()
|
||||
return ""
|
||||
|
||||
def api(path, method="GET", data=None):
|
||||
token = get_token()
|
||||
url = f"{GITEA_BASE}{path}"
|
||||
headers = {"Authorization": f"token {token}"}
|
||||
body = json.dumps(data).encode() if data else None
|
||||
if data: headers["Content-Type"] = "application/json"
|
||||
req = urllib.request.Request(url, data=body, headers=headers, method=method)
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=15)
|
||||
raw = resp.read().decode()
|
||||
return json.loads(raw) if raw.strip() else {}
|
||||
except urllib.error.HTTPError as e:
|
||||
err = e.read().decode()
|
||||
print(f" API error {e.code}: {err[:200]}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f" API error: {e}")
|
||||
return None
|
||||
|
||||
def log_delegation(msg, level="INFO"):
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||||
entry = f"[{ts}] [{level}] {msg}"
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
with open(DELEGATION_LOG, "a") as f: f.write(entry + "\n")
|
||||
print(f" {entry}")
|
||||
|
||||
def suggest_agent(title, body):
|
||||
text = (title + " " + body).lower()
|
||||
for agent, keywords in KEYWORDS.items():
|
||||
if any(kw in text for kw in keywords):
|
||||
return agent, f"keywords matched for {agent}"
|
||||
return None, None
|
||||
|
||||
def assign_issue(repo, issue_num, agent):
|
||||
result = api(f"/repos/{repo}/issues/{issue_num}", method="PATCH",
|
||||
data={"assignees": {"operation": "set", "usernames": [agent]}})
|
||||
if result:
|
||||
log_delegation(f"Assigned {repo}#{issue_num} to {agent}")
|
||||
comment_on_issue(repo, issue_num, f"[AUTO-ASSIGN] Assigned to {agent}.")
|
||||
return result
|
||||
|
||||
def comment_on_issue(repo, issue_num, body):
|
||||
return api(f"/repos/{repo}/issues/{issue_num}/comments", method="POST", data={"body": body})
|
||||
|
||||
def get_my_issues(agent):
|
||||
issues = []
|
||||
for repo in MONITORED_REPOS:
|
||||
repo_issues = api(f"/repos/{repo}/issues?state=open&limit=50")
|
||||
if repo_issues:
|
||||
for i in repo_issues:
|
||||
for a in (i.get("assignees") or []):
|
||||
if a.get("login") == agent:
|
||||
issues.append({"repo": repo, "issue": i})
|
||||
return issues
|
||||
|
||||
def run_cycle():
|
||||
log_delegation("Starting delegation cycle")
|
||||
count = 0
|
||||
for repo in MONITORED_REPOS:
|
||||
issues = api(f"/repos/{repo}/issues?state=open&limit=50")
|
||||
if not issues: continue
|
||||
for issue in issues:
|
||||
if issue.get("assignees"): continue
|
||||
title = issue.get("title","")
|
||||
body = issue.get("body","")
|
||||
if any(w in title.lower() for w in ["epic","discussion","question"]): continue
|
||||
agent, reason = suggest_agent(title, body)
|
||||
if agent:
|
||||
if assign_issue(repo, issue["number"], agent): count += 1
|
||||
log_delegation(f"Cycle complete: {count} new assignments")
|
||||
return count
|
||||
|
||||
def show_status():
|
||||
print("\n=== Delegation Status ===")
|
||||
for name, info in AGENTS.items():
|
||||
issues = get_my_issues(name)
|
||||
status = "ONLINE" if info["active"] else "OFFLINE"
|
||||
print(f" {name}: {len(issues)} assigned [{status}]")
|
||||
for iss in issues[:3]:
|
||||
print(f" - {iss['repo'].split('/')[-1]}#{iss['issue']['number']}: {iss['issue']['title'][:60]}")
|
||||
if len(issues) > 3:
|
||||
print(f" ... +{len(issues)-3} more")
|
||||
|
||||
if __name__ == "__main__":
|
||||
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
if len(sys.argv) > 1:
|
||||
cmd = sys.argv[1]
|
||||
if cmd == "status": show_status()
|
||||
elif cmd == "run":
|
||||
run_cycle()
|
||||
show_status()
|
||||
elif cmd == "assign" and len(sys.argv) >= 5:
|
||||
assign_issue(sys.argv[3], int(sys.argv[2]), sys.argv[4])
|
||||
else:
|
||||
print("Usage: delegation.py [run|status|assign <issue_num> <repo> <agent>]")
|
||||
else:
|
||||
run_cycle()
|
||||
show_status()
|
||||
171
fleet/model-fallback.sh
Executable file
171
fleet/model-fallback.sh
Executable file
@@ -0,0 +1,171 @@
|
||||
#!/usr/bin/env bash
|
||||
# FLEET-011: Local Model Pipeline and Fallback Chain
|
||||
# Phase 4: Sovereignty — all inference runs locally, no cloud dependency.
|
||||
#
|
||||
# Usage:
|
||||
# ./model-fallback.sh # Show current model chain status
|
||||
# ./model-fallback.sh list # List all local models
|
||||
# ./model-fallback.sh test "Hello" # Test the full fallback chain
|
||||
# ./model-fallback.sh chat # Interactive chat mode
|
||||
# ./model-fallback.sh install # Install default model chain
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# === CONFIG ===
|
||||
CHAIN_FILE="$HOME/.local/timmy/fleet-resources/model-chain.json"
|
||||
LOG_DIR="$HOME/.local/timmy/fleet-health"
|
||||
OLLAMA_URL="http://localhost:11434"
|
||||
|
||||
# Default chain (best quality first, fallback to smallest that runs)
|
||||
DEFAULT_CHAIN=$(cat << 'EOF'
|
||||
{
|
||||
"chain": [
|
||||
{"name": "hermes4:14b", "provider": "ollama", "max_tokens": 4096, "purpose": "primary"},
|
||||
{"name": "qwen2.5:7b", "provider": "ollama", "max_tokens": 4096, "purpose": "fallback"},
|
||||
{"name": "phi3:3.8b", "provider": "ollama", "max_tokens": 2048, "purpose": "emergency"},
|
||||
{"name": "gemma2:2b", "provider": "ollama", "max_tokens": 2048, "purpose": "minimal"}
|
||||
]
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
load_chain() {
|
||||
if [ -f "$CHAIN_FILE" ]; then
|
||||
cat "$CHAIN_FILE"
|
||||
else
|
||||
echo "$DEFAULT_CHAIN"
|
||||
fi
|
||||
}
|
||||
|
||||
save_chain() {
|
||||
echo "$1" > "$CHAIN_FILE"
|
||||
echo "Model chain saved to $CHAIN_FILE"
|
||||
}
|
||||
|
||||
install_chain() {
|
||||
echo "Installing default model chain..."
|
||||
echo "$DEFAULT_CHAIN" > "$CHAIN_FILE"
|
||||
|
||||
# Extract model names and install via Ollama
|
||||
echo "$DEFAULT_CHAIN" | python3 -c "
|
||||
import json,sys
|
||||
for m in json.load(sys.stdin)['chain']:
|
||||
print(m['name'])
|
||||
" | while read model; do
|
||||
echo " Installing $model..."
|
||||
if ollama list 2>/dev/null | grep -q "$model"; then
|
||||
echo " $model already installed"
|
||||
else
|
||||
ollama pull "$model" 2>&1 | tail -1
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
list_models() {
|
||||
echo "=== Local Models (Ollama) ==="
|
||||
ollama list 2>/dev/null || echo "Ollama not running or not installed"
|
||||
|
||||
echo ""
|
||||
echo "=== Active Fallback Chain ==="
|
||||
load_chain | python3 -c "
|
||||
import json,sys
|
||||
data = json.load(sys.stdin)
|
||||
print(f'{\"Model\":<25} {\"Purpose\":<12} {\"Max tokens\":>10}')
|
||||
print('-' * 50)
|
||||
for m in data['chain']:
|
||||
print(f'{m[\"name\"]:<25} {m[\"purpose\"]:<12} {m[\"max_tokens\"]:>10}')
|
||||
"
|
||||
}
|
||||
|
||||
status() {
|
||||
echo "=== Model Pipeline Status ==="
|
||||
|
||||
# Check Ollama
|
||||
if curl -s "$OLLAMA_URL/api/tags" >/dev/null 2>&1; then
|
||||
echo " Ollama: RUNNING at $OLLAMA_URL"
|
||||
model_count=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import json,sys; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
|
||||
echo " Local models: $model_count"
|
||||
else
|
||||
echo " Ollama: DOWN - no local inference available"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Fallback Chain ==="
|
||||
list_models
|
||||
}
|
||||
|
||||
test_chain() {
|
||||
local prompt="$1"
|
||||
|
||||
echo "Testing fallback chain with prompt: \"$prompt\""
|
||||
echo ""
|
||||
|
||||
load_chain | python3 -c "
|
||||
import json,sys,urllib.request,subprocess
|
||||
|
||||
data = json.load(sys.stdin)
|
||||
models = data['chain']
|
||||
prompt = '$(echo "$prompt" | sed "s/'/\\\\'/g")'
|
||||
|
||||
for m in models:
|
||||
name = m['name']
|
||||
print(f' Testing {name}...', end=' ')
|
||||
try:
|
||||
body = json.dumps({'model': name, 'prompt': '$prompt', 'stream': False}).encode()
|
||||
req = urllib.request.Request('http://localhost:11434/api/generate', data=body,
|
||||
headers={'Content-Type': 'application/json'})
|
||||
resp = urllib.request.urlopen(req, timeout=30)
|
||||
result = json.loads(resp.read())
|
||||
print(f'OK — \"{result.get(\"response\", \"\")[:80]}\"')
|
||||
print(f' Chain works! Primary model ({name}) is serving.')
|
||||
sys.exit(0)
|
||||
except Exception as e:
|
||||
print(f'FAILED — {str(e)[:60]}')
|
||||
|
||||
print('All models failed. No local inference available.')
|
||||
"
|
||||
}
|
||||
|
||||
chat() {
|
||||
echo "=== Beacon Chat Mode ==="
|
||||
echo "Type 'quit' to exit. Using local model chain."
|
||||
echo ""
|
||||
|
||||
load_chain | python3 -c "
|
||||
import json,sys,urllib.request
|
||||
|
||||
data = json.load(sys.stdin)
|
||||
models = data['chain']
|
||||
|
||||
while True:
|
||||
try:
|
||||
prompt = input('> ')
|
||||
except EOFError:
|
||||
break
|
||||
if prompt.lower() in ('quit', 'exit'):
|
||||
break
|
||||
if not prompt.strip():
|
||||
continue
|
||||
|
||||
print('Thinking...')
|
||||
body = json.dumps({'model': models[0]['name'], 'prompt': prompt, 'stream': False}).encode()
|
||||
req = urllib.request.Request('http://localhost:11434/api/generate', data=body,
|
||||
headers={'Content-Type': 'application/json'})
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=120)
|
||||
result = json.loads(resp.read())
|
||||
print(result.get('response', '').strip())
|
||||
except Exception as e:
|
||||
print(f'Model error: {e}')
|
||||
print('Trying next model in chain...')
|
||||
"
|
||||
}
|
||||
|
||||
case "${1:-status}" in
|
||||
install) install_chain ;;
|
||||
list) list_models ;;
|
||||
test) test_chain "${2:-Hello, are you there?}" ;;
|
||||
chat) chat ;;
|
||||
status) status ;;
|
||||
*) echo "Usage: $0 [install|list|test|chat|status]" ;;
|
||||
esac
|
||||
Reference in New Issue
Block a user