timmy-config/scripts/telemetry.py

#!/usr/bin/env python3
"""
[OPS] Telemetry Pipeline v2
Part of the Gemini Sovereign Infrastructure Suite.

Operational visibility without cloud dependencies.
"""

import os
import sys
import json
import time
import argparse

SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
if SCRIPT_DIR not in sys.path:
    sys.path.insert(0, SCRIPT_DIR)

from ssh_trust import VerifiedSSHExecutor
import yaml

# --- CONFIGURATION ---

def get_config_path():
    return os.environ.get('TIMMY_CONFIG') or os.path.join(
        os.path.dirname(os.path.abspath(__file__)), '..', 'config.yaml'
    )

def load_fleet_inventory():
    """Return {{host: ip}} map from config.yaml or fallback defaults."""
    try:
        with open(get_config_path(), 'r') as f:
            cfg = yaml.safe_load(f)
        inv = cfg.get('fleet', {}).get('inventory', {})
        if inv:
            return {k: v['ip'] for k, v in inv.items()}
    except Exception:
        pass
    return {
        "mac": "10.1.10.77",
        "ezra": "143.198.27.163",
        "allegro": "167.99.126.228",
        "bezalel": "159.203.146.185",
    }
FLEET = load_fleet_inventory()  # dict {host: ip} loaded from config or defaults
TELEMETRY_FILE = "logs/telemetry.json"

class Telemetry:
    def __init__(self, executor=None):
        self.executor = executor or VerifiedSSHExecutor()
        # Find logs relative to repo root
        script_dir = os.path.dirname(os.path.abspath(__file__))
        repo_root = os.path.dirname(script_dir)
        self.logs_dir = os.path.join(repo_root, "logs")
        self.telemetry_path = os.path.join(repo_root, TELEMETRY_FILE)

        if not os.path.exists(self.logs_dir):
            os.makedirs(self.logs_dir)

    def log(self, message: str):
        print(f"[*] {message}")

    def get_metrics(self, host: str):
        ip = FLEET[host]
        # Command to get disk usage, memory usage (%), and load avg
        cmd = "df -h / | tail -1 | awk '{print $5}' && free -m | grep Mem | awk '{print $3/$2 * 100}' && uptime | awk '{print $10}'"

        if host == 'mac':
            # Mac specific commands
            cmd = "df -h / | tail -1 | awk '{print $5}' && sysctl -n vm.page_pageable_internal_count && uptime | awk '{print $10}'"

        try:
            res = self.executor.run_script(ip, cmd, local=(host == 'mac'), timeout=10)
            if res.returncode == 0:
                lines = res.stdout.strip().split("\n")
                return {
                    "disk_usage": lines[0],
                    "mem_usage": f"{float(lines[1]):.1f}%" if len(lines) > 1 and lines[1].replace('.','',1).isdigit() else "unknown",
                    "load_avg": lines[2].rstrip(",") if len(lines) > 2 else "unknown"
                }
        except:
            pass
        return None

    def collect(self):
        self.log("Collecting telemetry from fleet...")
        data = {
            "timestamp": time.time(),
            "metrics": {}
        }

        for host in FLEET:
            self.log(f"Fetching metrics from {host}...")
            metrics = self.get_metrics(host)
            if metrics:
                data["metrics"][host] = metrics

        # Append to telemetry file
        history = []
        if os.path.exists(self.telemetry_path):
            with open(self.telemetry_path, "r") as f:
                try:
                    history = json.load(f)
                except:
                    history = []

        history.append(data)
        # Keep only last 100 entries
        history = history[-100:]

        with open(self.telemetry_path, "w") as f:
            json.dump(history, f, indent=2)

        self.log(f"Telemetry saved to {self.telemetry_path}")

    def show_summary(self):
        if not os.path.exists(self.telemetry_path):
            print("No telemetry data found.")
            return

        with open(self.telemetry_path, "r") as f:
            try:
                history = json.load(f)
            except:
                print("Error reading telemetry data.")
                return

        if not history:
            print("No telemetry data found.")
            return

        latest = history[-1]
        print(f"\n--- Fleet Telemetry Summary ({time.ctime(latest['timestamp'])}) ---")
        print(f"{'HOST':<10} {'DISK':<10} {'MEM':<10} {'LOAD':<10}")
        print("-" * 45)
        for host, m in latest["metrics"].items():
            print(f"{host:<10} {m['disk_usage']:<10} {m['mem_usage']:<10} {m['load_avg']:<10}")

def main():
    parser = argparse.ArgumentParser(description="Gemini Telemetry")
    parser.add_argument("command", choices=["collect", "summary"], help="Command to run")
    args = parser.parse_args()

    telemetry = Telemetry()
    if args.command == "collect":
        telemetry.collect()
    elif args.command == "summary":
        telemetry.show_summary()

if __name__ == "__main__":
    main()