#!/usr/bin/env python3 """ Timmy plays Morrowind — Screen capture + Input automation framework. Uses macOS Quartz for screenshots, Vision for OCR, CGEvent for input. """ import time import subprocess import json import Quartz import CoreFoundation from Foundation import NSURL from Quartz import CIImage import Vision from pynput.keyboard import Key, Controller as KeyController from pynput.mouse import Button, Controller as MouseController keyboard = KeyController() mouse = MouseController() SCREENSHOT_PATH = "/tmp/morrowind_screen.png" def bring_to_front(): """Bring OpenMW window to front.""" subprocess.run([ "osascript", "-e", 'tell application "System Events" to set frontmost of process "openmw" to true' ], capture_output=True) time.sleep(0.5) def screenshot(): """Capture the screen and return the path.""" image = Quartz.CGDisplayCreateImage(Quartz.CGMainDisplayID()) if not image: return None url = CoreFoundation.CFURLCreateWithFileSystemPath( None, SCREENSHOT_PATH, 0, False ) dest = Quartz.CGImageDestinationCreateWithURL(url, 'public.png', 1, None) Quartz.CGImageDestinationAddImage(dest, image, None) Quartz.CGImageDestinationFinalize(dest) w = Quartz.CGImageGetWidth(image) h = Quartz.CGImageGetHeight(image) return SCREENSHOT_PATH, w, h def ocr(path=SCREENSHOT_PATH): """OCR the screenshot and return all detected text.""" url = NSURL.fileURLWithPath_(path) ci = CIImage.imageWithContentsOfURL_(url) if not ci: return [] req = Vision.VNRecognizeTextRequest.alloc().init() req.setRecognitionLevel_(1) # accurate handler = Vision.VNImageRequestHandler.alloc().initWithCIImage_options_(ci, None) success, error = handler.performRequests_error_([req], None) if not success: return [] results = [] for r in req.results(): bbox = r.boundingBox() # normalized coordinates results.append({ "text": r.text(), "confidence": r.confidence(), "x": bbox.origin.x, "y": bbox.origin.y, "w": bbox.size.width, "h": bbox.size.height, }) return results def press_key(key, duration=0.1): """Press and release a key.""" keyboard.press(key) time.sleep(duration) keyboard.release(key) def type_text(text): """Type a string.""" keyboard.type(text) def click(x, y, button='left'): """Click at screen coordinates.""" mouse.position = (x, y) time.sleep(0.05) btn = Button.left if button == 'left' else Button.right mouse.click(btn) def move_mouse(dx, dy): """Move mouse by delta (for camera look).""" cx, cy = mouse.position mouse.position = (cx + dx, cy + dy) def walk_forward(duration=1.0): """Hold W to walk forward.""" keyboard.press('w') time.sleep(duration) keyboard.release('w') def walk_backward(duration=1.0): keyboard.press('s') time.sleep(duration) keyboard.release('s') def strafe_left(duration=0.5): keyboard.press('a') time.sleep(duration) keyboard.release('a') def strafe_right(duration=0.5): keyboard.press('d') time.sleep(duration) keyboard.release('d') def jump(): press_key('e') # OpenMW default jump def attack(): """Left click attack.""" mouse.click(Button.left) def use(): """Activate / use.""" press_key(' ') # spacebar = activate in OpenMW def open_menu(): press_key(Key.esc) def open_journal(): press_key('j') def open_inventory(): press_key('i') def look_around(yaw_degrees=90): """Rotate camera by moving mouse.""" # Rough: ~5 pixels per degree at default sensitivity move_mouse(int(yaw_degrees * 5), 0) def look_up(degrees=30): move_mouse(0, int(-degrees * 5)) def look_down(degrees=30): move_mouse(0, int(degrees * 5)) def see(): """Take a screenshot, OCR it, return structured perception.""" bring_to_front() time.sleep(0.3) result = screenshot() if not result: return {"error": "screenshot failed"} path, w, h = result texts = ocr(path) return { "screenshot": path, "resolution": f"{w}x{h}", "text": texts, "text_summary": " | ".join(t["text"] for t in texts[:20]), } if __name__ == "__main__": print("=== Timmy's Morrowind Eyes ===") bring_to_front() time.sleep(1) perception = see() print(f"Resolution: {perception['resolution']}") print(f"Text found: {len(perception['text'])} elements") print(f"Summary: {perception['text_summary'][:500]}")