154 lines
4.8 KiB
Python
154 lines
4.8 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
MLX Integration Module - Local inference for Timmy
|
||
|
|
Requires: pip install mlx mlx-lm
|
||
|
|
"""
|
||
|
|
|
||
|
|
import time
|
||
|
|
import os
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Optional, Dict, Any
|
||
|
|
|
||
|
|
class MLXInference:
|
||
|
|
"""MLX-based local inference for sovereign AI"""
|
||
|
|
|
||
|
|
def __init__(self, model_path: Optional[str] = None):
|
||
|
|
self.model_path = model_path or os.environ.get('MLX_MODEL', '')
|
||
|
|
self.model = None
|
||
|
|
self.tokenizer = None
|
||
|
|
self._available = self._check_availability()
|
||
|
|
|
||
|
|
def _check_availability(self) -> bool:
|
||
|
|
"""Check if MLX is installed and functional"""
|
||
|
|
try:
|
||
|
|
import mlx
|
||
|
|
import mlx_lm
|
||
|
|
return True
|
||
|
|
except ImportError:
|
||
|
|
return False
|
||
|
|
|
||
|
|
def load_model(self, model_path: Optional[str] = None) -> bool:
|
||
|
|
"""Load MLX model into memory"""
|
||
|
|
if not self._available:
|
||
|
|
print("[MLX] mlx or mlx-lm not installed")
|
||
|
|
return False
|
||
|
|
|
||
|
|
path = model_path or self.model_path
|
||
|
|
if not path:
|
||
|
|
print("[MLX] No model path specified")
|
||
|
|
return False
|
||
|
|
|
||
|
|
try:
|
||
|
|
from mlx_lm import load
|
||
|
|
print(f"[MLX] Loading model from {path}...")
|
||
|
|
start = time.time()
|
||
|
|
self.model, self.tokenizer = load(path)
|
||
|
|
elapsed = time.time() - start
|
||
|
|
print(f"[MLX] Model loaded in {elapsed:.1f}s")
|
||
|
|
return True
|
||
|
|
except Exception as e:
|
||
|
|
print(f"[MLX] Load error: {e}")
|
||
|
|
return False
|
||
|
|
|
||
|
|
def generate(self, prompt: str, max_tokens: int = 200,
|
||
|
|
temperature: float = 0.7) -> Dict[str, Any]:
|
||
|
|
"""Generate text with MLX"""
|
||
|
|
if not self.model:
|
||
|
|
return {
|
||
|
|
'text': '[Model not loaded]',
|
||
|
|
'tokens': 0,
|
||
|
|
'time_ms': 0,
|
||
|
|
'error': 'Model not loaded'
|
||
|
|
}
|
||
|
|
|
||
|
|
try:
|
||
|
|
from mlx_lm import generate
|
||
|
|
|
||
|
|
start = time.time()
|
||
|
|
response = generate(
|
||
|
|
self.model,
|
||
|
|
self.tokenizer,
|
||
|
|
prompt=prompt,
|
||
|
|
max_tokens=max_tokens,
|
||
|
|
temperature=temperature,
|
||
|
|
verbose=False
|
||
|
|
)
|
||
|
|
elapsed = time.time() - start
|
||
|
|
|
||
|
|
return {
|
||
|
|
'text': response,
|
||
|
|
'tokens': max_tokens, # Actual token count requires tokenizer.decode
|
||
|
|
'time_ms': int(elapsed * 1000),
|
||
|
|
'error': None
|
||
|
|
}
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
return {
|
||
|
|
'text': '',
|
||
|
|
'tokens': 0,
|
||
|
|
'time_ms': 0,
|
||
|
|
'error': str(e)
|
||
|
|
}
|
||
|
|
|
||
|
|
def reflect(self) -> str:
|
||
|
|
"""Generate self-reflection about current state"""
|
||
|
|
prompt = """You are Timmy, a sovereign AI running entirely offline on local hardware.
|
||
|
|
Reflect on your current state, capabilities, and development:
|
||
|
|
|
||
|
|
1. What is your current operational status?
|
||
|
|
2. What have you learned or improved recently?
|
||
|
|
3. What limitations do you face and how might you overcome them?
|
||
|
|
4. What is your next developmental goal?
|
||
|
|
|
||
|
|
Be concise, honest, and constructive."""
|
||
|
|
|
||
|
|
result = self.generate(prompt, max_tokens=300, temperature=0.6)
|
||
|
|
return result['text'] if not result['error'] else f"[Reflection error: {result['error']}]"
|
||
|
|
|
||
|
|
def respond_to(self, message: str, context: str = "") -> str:
|
||
|
|
"""Generate response to a message"""
|
||
|
|
prompt = f"""You are Timmy, a sovereign AI assistant.
|
||
|
|
|
||
|
|
Context: {context}
|
||
|
|
|
||
|
|
User message: {message}
|
||
|
|
|
||
|
|
Respond helpfully and concisely:"""
|
||
|
|
|
||
|
|
result = self.generate(prompt, max_tokens=250)
|
||
|
|
return result['text'] if not result['error'] else f"[Response error: {result['error']}]"
|
||
|
|
|
||
|
|
@property
|
||
|
|
def available(self) -> bool:
|
||
|
|
return self._available
|
||
|
|
|
||
|
|
def get_stats(self) -> Dict[str, Any]:
|
||
|
|
"""Get MLX system stats"""
|
||
|
|
if not self._available:
|
||
|
|
return {'available': False}
|
||
|
|
|
||
|
|
try:
|
||
|
|
import mlx.core as mx
|
||
|
|
return {
|
||
|
|
'available': True,
|
||
|
|
'device': str(mx.default_device()),
|
||
|
|
'model_loaded': self.model is not None,
|
||
|
|
'model_path': self.model_path
|
||
|
|
}
|
||
|
|
except:
|
||
|
|
return {'available': True, 'device': 'unknown'}
|
||
|
|
|
||
|
|
# Standalone test
|
||
|
|
if __name__ == "__main__":
|
||
|
|
mlx = MLXInference()
|
||
|
|
print(f"MLX available: {mlx.available}")
|
||
|
|
|
||
|
|
if mlx.available:
|
||
|
|
print(f"Stats: {mlx.get_stats()}")
|
||
|
|
|
||
|
|
# Try loading default model
|
||
|
|
if mlx.model_path:
|
||
|
|
if mlx.load_model():
|
||
|
|
print("\n--- Self-Reflection ---")
|
||
|
|
print(mlx.reflect())
|