Some checks failed
Smoke Test / smoke (pull_request) Failing after 23s
Architecture Lint / Linter Tests (pull_request) Successful in 26s
Validate Config / YAML Lint (pull_request) Failing after 15s
Validate Config / JSON Validate (pull_request) Successful in 19s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 1m1s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 1m4s
Validate Config / Cron Syntax Check (pull_request) Successful in 13s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 13s
Validate Config / Playbook Schema Validation (pull_request) Successful in 25s
Architecture Lint / Lint Repository (pull_request) Failing after 22s
PR Checklist / pr-checklist (pull_request) Successful in 5m0s
Add fleet.inventory and fleet.path_contracts to config.yaml: - Central source of truth for IPs, ports, roles, remote paths - Introduce get_config_path(), load_fleet_inventory(), get_path_contract() - Updated fleet_llama.py, self_healing.py, telemetry.py, agent_dispatch.py, skill_installer.py to read from config instead of hard-coded dicts/paths - Documented inventory contract and override mechanism in scripts/README.md Scripts retain forward-compatible fallback defaults for backwards compatibility. Closes #433
152 lines
4.9 KiB
Python
152 lines
4.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
[OPS] Telemetry Pipeline v2
|
|
Part of the Gemini Sovereign Infrastructure Suite.
|
|
|
|
Operational visibility without cloud dependencies.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
import time
|
|
import argparse
|
|
|
|
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
if SCRIPT_DIR not in sys.path:
|
|
sys.path.insert(0, SCRIPT_DIR)
|
|
|
|
from ssh_trust import VerifiedSSHExecutor
|
|
import yaml
|
|
|
|
# --- CONFIGURATION ---
|
|
|
|
def get_config_path():
|
|
return os.environ.get('TIMMY_CONFIG') or os.path.join(
|
|
os.path.dirname(os.path.abspath(__file__)), '..', 'config.yaml'
|
|
)
|
|
|
|
def load_fleet_inventory():
|
|
"""Return {{host: ip}} map from config.yaml or fallback defaults."""
|
|
try:
|
|
with open(get_config_path(), 'r') as f:
|
|
cfg = yaml.safe_load(f)
|
|
inv = cfg.get('fleet', {}).get('inventory', {})
|
|
if inv:
|
|
return {k: v['ip'] for k, v in inv.items()}
|
|
except Exception:
|
|
pass
|
|
return {
|
|
"mac": "10.1.10.77",
|
|
"ezra": "143.198.27.163",
|
|
"allegro": "167.99.126.228",
|
|
"bezalel": "159.203.146.185",
|
|
}
|
|
FLEET = load_fleet_inventory() # dict {host: ip} loaded from config or defaults
|
|
TELEMETRY_FILE = "logs/telemetry.json"
|
|
|
|
class Telemetry:
|
|
def __init__(self, executor=None):
|
|
self.executor = executor or VerifiedSSHExecutor()
|
|
# Find logs relative to repo root
|
|
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
repo_root = os.path.dirname(script_dir)
|
|
self.logs_dir = os.path.join(repo_root, "logs")
|
|
self.telemetry_path = os.path.join(repo_root, TELEMETRY_FILE)
|
|
|
|
if not os.path.exists(self.logs_dir):
|
|
os.makedirs(self.logs_dir)
|
|
|
|
def log(self, message: str):
|
|
print(f"[*] {message}")
|
|
|
|
def get_metrics(self, host: str):
|
|
ip = FLEET[host]
|
|
# Command to get disk usage, memory usage (%), and load avg
|
|
cmd = "df -h / | tail -1 | awk '{print $5}' && free -m | grep Mem | awk '{print $3/$2 * 100}' && uptime | awk '{print $10}'"
|
|
|
|
if host == 'mac':
|
|
# Mac specific commands
|
|
cmd = "df -h / | tail -1 | awk '{print $5}' && sysctl -n vm.page_pageable_internal_count && uptime | awk '{print $10}'"
|
|
|
|
try:
|
|
res = self.executor.run_script(ip, cmd, local=(host == 'mac'), timeout=10)
|
|
if res.returncode == 0:
|
|
lines = res.stdout.strip().split("\n")
|
|
return {
|
|
"disk_usage": lines[0],
|
|
"mem_usage": f"{float(lines[1]):.1f}%" if len(lines) > 1 and lines[1].replace('.','',1).isdigit() else "unknown",
|
|
"load_avg": lines[2].rstrip(",") if len(lines) > 2 else "unknown"
|
|
}
|
|
except:
|
|
pass
|
|
return None
|
|
|
|
def collect(self):
|
|
self.log("Collecting telemetry from fleet...")
|
|
data = {
|
|
"timestamp": time.time(),
|
|
"metrics": {}
|
|
}
|
|
|
|
for host in FLEET:
|
|
self.log(f"Fetching metrics from {host}...")
|
|
metrics = self.get_metrics(host)
|
|
if metrics:
|
|
data["metrics"][host] = metrics
|
|
|
|
# Append to telemetry file
|
|
history = []
|
|
if os.path.exists(self.telemetry_path):
|
|
with open(self.telemetry_path, "r") as f:
|
|
try:
|
|
history = json.load(f)
|
|
except:
|
|
history = []
|
|
|
|
history.append(data)
|
|
# Keep only last 100 entries
|
|
history = history[-100:]
|
|
|
|
with open(self.telemetry_path, "w") as f:
|
|
json.dump(history, f, indent=2)
|
|
|
|
self.log(f"Telemetry saved to {self.telemetry_path}")
|
|
|
|
def show_summary(self):
|
|
if not os.path.exists(self.telemetry_path):
|
|
print("No telemetry data found.")
|
|
return
|
|
|
|
with open(self.telemetry_path, "r") as f:
|
|
try:
|
|
history = json.load(f)
|
|
except:
|
|
print("Error reading telemetry data.")
|
|
return
|
|
|
|
if not history:
|
|
print("No telemetry data found.")
|
|
return
|
|
|
|
latest = history[-1]
|
|
print(f"\n--- Fleet Telemetry Summary ({time.ctime(latest['timestamp'])}) ---")
|
|
print(f"{'HOST':<10} {'DISK':<10} {'MEM':<10} {'LOAD':<10}")
|
|
print("-" * 45)
|
|
for host, m in latest["metrics"].items():
|
|
print(f"{host:<10} {m['disk_usage']:<10} {m['mem_usage']:<10} {m['load_avg']:<10}")
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Gemini Telemetry")
|
|
parser.add_argument("command", choices=["collect", "summary"], help="Command to run")
|
|
args = parser.parse_args()
|
|
|
|
telemetry = Telemetry()
|
|
if args.command == "collect":
|
|
telemetry.collect()
|
|
elif args.command == "summary":
|
|
telemetry.show_summary()
|
|
|
|
if __name__ == "__main__":
|
|
main()
|