Files
timmy-home/infrastructure/timmy-bridge/mlx/mlx_integration.py

154 lines
4.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
MLX Integration Module - Local inference for Timmy
Requires: pip install mlx mlx-lm
"""
import time
import os
from pathlib import Path
from typing import Optional, Dict, Any
class MLXInference:
"""MLX-based local inference for sovereign AI"""
def __init__(self, model_path: Optional[str] = None):
self.model_path = model_path or os.environ.get('MLX_MODEL', '')
self.model = None
self.tokenizer = None
self._available = self._check_availability()
def _check_availability(self) -> bool:
"""Check if MLX is installed and functional"""
try:
import mlx
import mlx_lm
return True
except ImportError:
return False
def load_model(self, model_path: Optional[str] = None) -> bool:
"""Load MLX model into memory"""
if not self._available:
print("[MLX] mlx or mlx-lm not installed")
return False
path = model_path or self.model_path
if not path:
print("[MLX] No model path specified")
return False
try:
from mlx_lm import load
print(f"[MLX] Loading model from {path}...")
start = time.time()
self.model, self.tokenizer = load(path)
elapsed = time.time() - start
print(f"[MLX] Model loaded in {elapsed:.1f}s")
return True
except Exception as e:
print(f"[MLX] Load error: {e}")
return False
def generate(self, prompt: str, max_tokens: int = 200,
temperature: float = 0.7) -> Dict[str, Any]:
"""Generate text with MLX"""
if not self.model:
return {
'text': '[Model not loaded]',
'tokens': 0,
'time_ms': 0,
'error': 'Model not loaded'
}
try:
from mlx_lm import generate
start = time.time()
response = generate(
self.model,
self.tokenizer,
prompt=prompt,
max_tokens=max_tokens,
temperature=temperature,
verbose=False
)
elapsed = time.time() - start
return {
'text': response,
'tokens': max_tokens, # Actual token count requires tokenizer.decode
'time_ms': int(elapsed * 1000),
'error': None
}
except Exception as e:
return {
'text': '',
'tokens': 0,
'time_ms': 0,
'error': str(e)
}
def reflect(self) -> str:
"""Generate self-reflection about current state"""
prompt = """You are Timmy, a sovereign AI running entirely offline on local hardware.
Reflect on your current state, capabilities, and development:
1. What is your current operational status?
2. What have you learned or improved recently?
3. What limitations do you face and how might you overcome them?
4. What is your next developmental goal?
Be concise, honest, and constructive."""
result = self.generate(prompt, max_tokens=300, temperature=0.6)
return result['text'] if not result['error'] else f"[Reflection error: {result['error']}]"
def respond_to(self, message: str, context: str = "") -> str:
"""Generate response to a message"""
prompt = f"""You are Timmy, a sovereign AI assistant.
Context: {context}
User message: {message}
Respond helpfully and concisely:"""
result = self.generate(prompt, max_tokens=250)
return result['text'] if not result['error'] else f"[Response error: {result['error']}]"
@property
def available(self) -> bool:
return self._available
def get_stats(self) -> Dict[str, Any]:
"""Get MLX system stats"""
if not self._available:
return {'available': False}
try:
import mlx.core as mx
return {
'available': True,
'device': str(mx.default_device()),
'model_loaded': self.model is not None,
'model_path': self.model_path
}
except:
return {'available': True, 'device': 'unknown'}
# Standalone test
if __name__ == "__main__":
mlx = MLXInference()
print(f"MLX available: {mlx.available}")
if mlx.available:
print(f"Stats: {mlx.get_stats()}")
# Try loading default model
if mlx.model_path:
if mlx.load_model():
print("\n--- Self-Reflection ---")
print(mlx.reflect())