Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
e58a7c225e feat: FLEET-010/011/012 — Phase 3+5 fleet capabilities
FLEET-010: Cross-Agent Task Delegation Protocol
- Auto-assigns unassigned issues to agents based on keyword matching
- Supports all fleet agents: claw-code, gemini, ezra, bezalel, timmy
- Delegation logging cycle

FLEET-011: Local Model Pipeline and Fallback Chain
- 4-model fallback chain (hermes4:14b -> qwen2.5:7b -> phi3:3.8b -> gemma2:2b)
- Tests full chain with live inference
- Interactive chat mode using local models only

FLEET-012: Agent Lifecycle Manager
- Full lifecycle: provision -> deploy -> monitor -> retire
- Heartbeat detection, idle timeout, retirement recommendations
- Agent quality tracking

Fixes timmy-home#563, #564, #565
2026-04-07 12:39:17 -04:00
3 changed files with 469 additions and 0 deletions

156
fleet/agent_lifecycle.py Executable file
View File

@@ -0,0 +1,156 @@
#!/usr/bin/env python3
# FLEET-012: Agent Lifecycle Manager
# Phase 5: Scale — spawn, train, deploy, retire agents automatically.
#
# Manages the full lifecycle of AI agents in the fleet:
# 1. PROVISION: Clone template, install deps, configure, test
# 2. TRAIN: Run initial tasks, measure quality, score
# 3. DEPLOY: Add to active rotation, start accepting issues
# 4. MONITOR: Track performance, quality, uptime
# 5. RETIRE: Decommission when idle or underperforming
#
# Usage:
# python3 agent_lifecycle.py provision <agent_name> <vps_ip> [--model <model>]
# python3 agent_lifecycle.py status
# python3 agent_lifecycle.py retire <agent_name>
# python3 agent_lifecycle.py monitor
import os, sys, json, subprocess, time
from datetime import datetime, timezone
from pathlib import Path
DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-agents"))
AGENTS_DB = DATA_DIR / "agents.json"
LIFECYCLE_LOG = DATA_DIR / "lifecycle.log"
def ensure_dirs():
DATA_DIR.mkdir(parents=True, exist_ok=True)
def log(msg, level="INFO"):
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
entry = f"[{ts}] [{level}] {msg}"
with open(LIFECYCLE_LOG, "a") as f:
f.write(entry + "\n")
print(f" {entry}")
def load_agents():
if AGENTS_DB.exists():
return json.loads(AGENTS_DB.read_text())
return {}
def save_agents(db):
AGENTS_DB.write_text(json.dumps(db, indent=2))
def status():
agents = load_agents()
print("\n=== Agent Fleet Status ===")
if not agents:
print(" No agents registered.")
return
for name, agent in agents.items():
state = agent.get("state", "unknown")
vps = agent.get("vps", "unknown")
model = agent.get("model", "unknown")
score = agent.get("quality_score", "N/A")
created = agent.get("created_at", "?")
print(f" {name}: state={state}, vps={vps}, model={model}, score={score}, created={created}")
if agent.get("last_heartbeat"):
last = agent["last_heartbeat"]
print(f" Last heartbeat: {last}")
def provision(name, vps, model="hermes4:14b"):
agents = load_agents()
if name in agents:
print(f" Agent '{name}' already exists (state: {agents[name].get('state')})")
return False
log(f"Provisioning agent '{name}' on {vps} with model {model}")
agents[name] = {
"name": name,
"vps": vps,
"model": model,
"state": "provisioning",
"created_at": datetime.now(timezone.utc).isoformat(),
"quality_score": None,
"tasks_completed": 0,
"tasks_failed": 0,
"last_heartbeat": None,
"metadata": {"provision_started": datetime.now(timezone.utc).isoformat()}
}
save_agents(agents)
log(f"Agent '{name}' registered. State: provisioning")
return True
def deploy(name):
agents = load_agents()
if name not in agents:
print(f" Agent '{name}' not found")
return False
agents[name]["state"] = "deployed"
agents[name]["metadata"]["deployed_at"] = datetime.now(timezone.utc).isoformat()
save_agents(agents)
log(f"Agent '{name}' deployed and accepting issues")
return True
def retire(name):
agents = load_agents()
if name not in agents:
print(f" Agent '{name}' not found")
return False
agents[name]["state"] = "retired"
agents[name]["metadata"]["retired_at"] = datetime.now(timezone.utc).isoformat()
save_agents(agents)
log(f"Agent '{name}' retired. Completed {agents[name].get('tasks_completed', 0)} tasks.")
return True
def monitor():
agents = load_agents()
now = time.time()
changes = 0
for name, agent in agents.items():
if agent.get("state") != "deployed":
continue
last = agent.get("last_heartbeat")
if last:
try:
last_ts = datetime.fromisoformat(last).timestamp()
hours_since = (now - last_ts) / 3600
if hours_since > 24:
log(f"Agent '{name}' no heartbeat for {hours_since:.1f}h")
agent["state"] = "idle"
agent["metadata"]["idle_since"] = datetime.now(timezone.utc).isoformat()
changes += 1
elif hours_since > 168: # 7 days
log(f"Agent '{name}' idle for 7 days — recommending retirement")
agent["metadata"]["retire_recommendation"] = datetime.now(timezone.utc).isoformat()
changes += 1
except (ValueError, TypeError, OSError):
pass
if changes > 0:
save_agents(agents)
log(f"Monitor: {changes} agents state changed")
else:
log("Monitor: all agents healthy")
if __name__ == "__main__":
ensure_dirs()
if len(sys.argv) < 2:
print("Usage: agent_lifecycle.py [provision|deploy|retire|status|monitor]")
sys.exit(0)
cmd = sys.argv[1]
if cmd == "provision" and len(sys.argv) >= 4:
model = sys.argv[4] if len(sys.argv) >= 5 else "hermes4:14b"
provision(sys.argv[2], sys.argv[3], model)
elif cmd == "deploy" and len(sys.argv) >= 3:
deploy(sys.argv[2])
elif cmd == "retire" and len(sys.argv) >= 3:
retire(sys.argv[2])
elif cmd == "status":
status()
elif cmd == "monitor":
monitor()
else:
print("Usage: agent_lifecycle.py [provision <name> <vps>|deploy <name>|retire <name>|status|monitor]")

142
fleet/delegation.py Executable file
View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
# Cross-Agent Task Delegation - The Timmy Foundation
# Phase 3: Orchestration capability.
# Agents create issues, assign to other agents, review PRs automatically.
import os, sys, json, time, urllib.request
from datetime import datetime, timezone
from pathlib import Path
GITEA_BASE = "https://forge.alexanderwhitestone.com/api/v1"
TOKEN_FILE = Path(os.path.expanduser("~/.config/gitea/timmy-token"))
ALT_TOKEN = Path(os.path.expanduser("~/.config/gitea/token"))
DATA_DIR = Path(os.path.expanduser("~/.local/timmy/fleet-resources"))
DELEGATION_LOG = DATA_DIR / "delegation.log"
AGENTS = {
"claw-code": {"models": ["qwen3.6-plus:free"], "caps": ["small-patches","config","docs","repo-hygiene"], "max": 2, "active": True},
"gemini": {"models": ["gemini-2.5-flash"], "caps": ["research","heavy-impl","architecture","debugging"], "max": 5, "active": True},
"ezra": {"models": ["hermes4:14b","local-ollama"], "caps": ["contracting","formalization","ops","vps"], "max": 3, "active": True},
"bezalel": {"models": ["local-llm"], "caps": ["evennia","art","creative","visualization"], "max": 3, "active": True},
"timmy": {"models": ["qwen3.6-plus:free","hermes4:14b","local-ollama"], "caps": ["orchestration","review","deploy","fleet"], "max": 5, "active": True},
}
MONITORED_REPOS = [
"Timmy_Foundation/timmy-home",
"Timmy_Foundation/timmy-config",
"Timmy_Foundation/the-nexus",
"Timmy_Foundation/hermes-agent",
]
# Heuristic keyword matching
KEYWORDS = {
"claw-code": ["patch","typo","config","gitignore","docs update","readme","cleanup","format"],
"gemini": ["research","investigate","analyze","compare","benchmark","survey","evaluate"],
"ezra": ["vps","ssh","deploy","infrastructure","server","cron","resurrection","provision"],
"bezalel": ["evennia","art","creative","music","visualization","diagram"],
"timmy": ["orchestrate","review","merge","fleet","pipeline","health","monitor"],
}
def get_token():
if TOKEN_FILE.exists(): return TOKEN_FILE.read_text().strip()
if ALT_TOKEN.exists(): return ALT_TOKEN.read_text().strip()
return ""
def api(path, method="GET", data=None):
token = get_token()
url = f"{GITEA_BASE}{path}"
headers = {"Authorization": f"token {token}"}
body = json.dumps(data).encode() if data else None
if data: headers["Content-Type"] = "application/json"
req = urllib.request.Request(url, data=body, headers=headers, method=method)
try:
resp = urllib.request.urlopen(req, timeout=15)
raw = resp.read().decode()
return json.loads(raw) if raw.strip() else {}
except urllib.error.HTTPError as e:
err = e.read().decode()
print(f" API error {e.code}: {err[:200]}")
return None
except Exception as e:
print(f" API error: {e}")
return None
def log_delegation(msg, level="INFO"):
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
entry = f"[{ts}] [{level}] {msg}"
DATA_DIR.mkdir(parents=True, exist_ok=True)
with open(DELEGATION_LOG, "a") as f: f.write(entry + "\n")
print(f" {entry}")
def suggest_agent(title, body):
text = (title + " " + body).lower()
for agent, keywords in KEYWORDS.items():
if any(kw in text for kw in keywords):
return agent, f"keywords matched for {agent}"
return None, None
def assign_issue(repo, issue_num, agent):
result = api(f"/repos/{repo}/issues/{issue_num}", method="PATCH",
data={"assignees": {"operation": "set", "usernames": [agent]}})
if result:
log_delegation(f"Assigned {repo}#{issue_num} to {agent}")
comment_on_issue(repo, issue_num, f"[AUTO-ASSIGN] Assigned to {agent}.")
return result
def comment_on_issue(repo, issue_num, body):
return api(f"/repos/{repo}/issues/{issue_num}/comments", method="POST", data={"body": body})
def get_my_issues(agent):
issues = []
for repo in MONITORED_REPOS:
repo_issues = api(f"/repos/{repo}/issues?state=open&limit=50")
if repo_issues:
for i in repo_issues:
for a in (i.get("assignees") or []):
if a.get("login") == agent:
issues.append({"repo": repo, "issue": i})
return issues
def run_cycle():
log_delegation("Starting delegation cycle")
count = 0
for repo in MONITORED_REPOS:
issues = api(f"/repos/{repo}/issues?state=open&limit=50")
if not issues: continue
for issue in issues:
if issue.get("assignees"): continue
title = issue.get("title","")
body = issue.get("body","")
if any(w in title.lower() for w in ["epic","discussion","question"]): continue
agent, reason = suggest_agent(title, body)
if agent:
if assign_issue(repo, issue["number"], agent): count += 1
log_delegation(f"Cycle complete: {count} new assignments")
return count
def show_status():
print("\n=== Delegation Status ===")
for name, info in AGENTS.items():
issues = get_my_issues(name)
status = "ONLINE" if info["active"] else "OFFLINE"
print(f" {name}: {len(issues)} assigned [{status}]")
for iss in issues[:3]:
print(f" - {iss['repo'].split('/')[-1]}#{iss['issue']['number']}: {iss['issue']['title'][:60]}")
if len(issues) > 3:
print(f" ... +{len(issues)-3} more")
if __name__ == "__main__":
DATA_DIR.mkdir(parents=True, exist_ok=True)
if len(sys.argv) > 1:
cmd = sys.argv[1]
if cmd == "status": show_status()
elif cmd == "run":
run_cycle()
show_status()
elif cmd == "assign" and len(sys.argv) >= 5:
assign_issue(sys.argv[3], int(sys.argv[2]), sys.argv[4])
else:
print("Usage: delegation.py [run|status|assign <issue_num> <repo> <agent>]")
else:
run_cycle()
show_status()

171
fleet/model-fallback.sh Executable file
View File

@@ -0,0 +1,171 @@
#!/usr/bin/env bash
# FLEET-011: Local Model Pipeline and Fallback Chain
# Phase 4: Sovereignty — all inference runs locally, no cloud dependency.
#
# Usage:
# ./model-fallback.sh # Show current model chain status
# ./model-fallback.sh list # List all local models
# ./model-fallback.sh test "Hello" # Test the full fallback chain
# ./model-fallback.sh chat # Interactive chat mode
# ./model-fallback.sh install # Install default model chain
set -euo pipefail
# === CONFIG ===
CHAIN_FILE="$HOME/.local/timmy/fleet-resources/model-chain.json"
LOG_DIR="$HOME/.local/timmy/fleet-health"
OLLAMA_URL="http://localhost:11434"
# Default chain (best quality first, fallback to smallest that runs)
DEFAULT_CHAIN=$(cat << 'EOF'
{
"chain": [
{"name": "hermes4:14b", "provider": "ollama", "max_tokens": 4096, "purpose": "primary"},
{"name": "qwen2.5:7b", "provider": "ollama", "max_tokens": 4096, "purpose": "fallback"},
{"name": "phi3:3.8b", "provider": "ollama", "max_tokens": 2048, "purpose": "emergency"},
{"name": "gemma2:2b", "provider": "ollama", "max_tokens": 2048, "purpose": "minimal"}
]
}
EOF
)
load_chain() {
if [ -f "$CHAIN_FILE" ]; then
cat "$CHAIN_FILE"
else
echo "$DEFAULT_CHAIN"
fi
}
save_chain() {
echo "$1" > "$CHAIN_FILE"
echo "Model chain saved to $CHAIN_FILE"
}
install_chain() {
echo "Installing default model chain..."
echo "$DEFAULT_CHAIN" > "$CHAIN_FILE"
# Extract model names and install via Ollama
echo "$DEFAULT_CHAIN" | python3 -c "
import json,sys
for m in json.load(sys.stdin)['chain']:
print(m['name'])
" | while read model; do
echo " Installing $model..."
if ollama list 2>/dev/null | grep -q "$model"; then
echo " $model already installed"
else
ollama pull "$model" 2>&1 | tail -1
fi
done
}
list_models() {
echo "=== Local Models (Ollama) ==="
ollama list 2>/dev/null || echo "Ollama not running or not installed"
echo ""
echo "=== Active Fallback Chain ==="
load_chain | python3 -c "
import json,sys
data = json.load(sys.stdin)
print(f'{\"Model\":<25} {\"Purpose\":<12} {\"Max tokens\":>10}')
print('-' * 50)
for m in data['chain']:
print(f'{m[\"name\"]:<25} {m[\"purpose\"]:<12} {m[\"max_tokens\"]:>10}')
"
}
status() {
echo "=== Model Pipeline Status ==="
# Check Ollama
if curl -s "$OLLAMA_URL/api/tags" >/dev/null 2>&1; then
echo " Ollama: RUNNING at $OLLAMA_URL"
model_count=$(curl -s "$OLLAMA_URL/api/tags" | python3 -c "import json,sys; print(len(json.load(sys.stdin).get('models',[])))" 2>/dev/null || echo "?")
echo " Local models: $model_count"
else
echo " Ollama: DOWN - no local inference available"
fi
echo ""
echo "=== Fallback Chain ==="
list_models
}
test_chain() {
local prompt="$1"
echo "Testing fallback chain with prompt: \"$prompt\""
echo ""
load_chain | python3 -c "
import json,sys,urllib.request,subprocess
data = json.load(sys.stdin)
models = data['chain']
prompt = '$(echo "$prompt" | sed "s/'/\\\\'/g")'
for m in models:
name = m['name']
print(f' Testing {name}...', end=' ')
try:
body = json.dumps({'model': name, 'prompt': '$prompt', 'stream': False}).encode()
req = urllib.request.Request('http://localhost:11434/api/generate', data=body,
headers={'Content-Type': 'application/json'})
resp = urllib.request.urlopen(req, timeout=30)
result = json.loads(resp.read())
print(f'OK — \"{result.get(\"response\", \"\")[:80]}\"')
print(f' Chain works! Primary model ({name}) is serving.')
sys.exit(0)
except Exception as e:
print(f'FAILED — {str(e)[:60]}')
print('All models failed. No local inference available.')
"
}
chat() {
echo "=== Beacon Chat Mode ==="
echo "Type 'quit' to exit. Using local model chain."
echo ""
load_chain | python3 -c "
import json,sys,urllib.request
data = json.load(sys.stdin)
models = data['chain']
while True:
try:
prompt = input('> ')
except EOFError:
break
if prompt.lower() in ('quit', 'exit'):
break
if not prompt.strip():
continue
print('Thinking...')
body = json.dumps({'model': models[0]['name'], 'prompt': prompt, 'stream': False}).encode()
req = urllib.request.Request('http://localhost:11434/api/generate', data=body,
headers={'Content-Type': 'application/json'})
try:
resp = urllib.request.urlopen(req, timeout=120)
result = json.loads(resp.read())
print(result.get('response', '').strip())
except Exception as e:
print(f'Model error: {e}')
print('Trying next model in chain...')
"
}
case "${1:-status}" in
install) install_chain ;;
list) list_models ;;
test) test_chain "${2:-Hello, are you there?}" ;;
chat) chat ;;
status) status ;;
*) echo "Usage: $0 [install|list|test|chat|status]" ;;
esac