feat: code quality audit + autoresearch integration + infra hardening (#150)

2026-03-08 12:50:44 -04:00
parent fd0ede0d51
commit ae3bb1cc21
186 changed files with 5129 additions and 3289 deletions
--- a/src/brain/worker.py
+++ b/src/brain/worker.py
@@ -21,11 +21,11 @@ logger = logging.getLogger(__name__)

 class DistributedWorker:
    """Continuous task processor for the distributed brain.
-    
+
    Runs on every device, claims tasks matching its capabilities,
    executes them immediately, stores results.
    """
-    
+
    def __init__(self, brain_client: Optional[BrainClient] = None):
        self.brain = brain_client or BrainClient()
        self.node_id = f"{socket.gethostname()}-{os.getpid()}"
@@ -33,30 +33,30 @@ class DistributedWorker:
        self.running = False
        self._handlers: Dict[str, Callable] = {}
        self._register_default_handlers()
-        
+
    def _detect_capabilities(self) -> List[str]:
        """Detect what this node can do."""
        caps = ["general", "shell", "file_ops", "git"]
-        
+
        # Check for GPU
        if self._has_gpu():
            caps.append("gpu")
            caps.append("creative")
            caps.append("image_gen")
            caps.append("video_gen")
-        
+
        # Check for internet
        if self._has_internet():
            caps.append("web")
            caps.append("research")
-        
+
        # Check memory
        mem_gb = self._get_memory_gb()
        if mem_gb > 16:
            caps.append("large_model")
        if mem_gb > 32:
            caps.append("huge_model")
-        
+
        # Check for specific tools
        if self._has_command("ollama"):
            caps.append("ollama")
@@ -64,17 +64,15 @@ class DistributedWorker:
            caps.append("docker")
        if self._has_command("cargo"):
            caps.append("rust")
-        
+
        logger.info(f"Worker capabilities: {caps}")
        return caps
-    
+
    def _has_gpu(self) -> bool:
        """Check for NVIDIA or AMD GPU."""
        try:
            # Check for nvidia-smi
-            result = subprocess.run(
-                ["nvidia-smi"], capture_output=True, timeout=5
-            )
+            result = subprocess.run(["nvidia-smi"], capture_output=True, timeout=5)
            if result.returncode == 0:
                return True
        except (OSError, subprocess.SubprocessError):
@@ -83,13 +81,15 @@ class DistributedWorker:
        # Check for ROCm
        if os.path.exists("/opt/rocm"):
            return True
-        
+
        # Check for Apple Silicon Metal
        if os.uname().sysname == "Darwin":
            try:
                result = subprocess.run(
                    ["system_profiler", "SPDisplaysDataType"],
-                    capture_output=True, text=True, timeout=5
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
                )
                if "Metal" in result.stdout:
                    return True
@@ -102,8 +102,7 @@ class DistributedWorker:
        """Check if we have internet connectivity."""
        try:
            result = subprocess.run(
-                ["curl", "-s", "--max-time", "3", "https://1.1.1.1"],
-                capture_output=True, timeout=5
+                ["curl", "-s", "--max-time", "3", "https://1.1.1.1"], capture_output=True, timeout=5
            )
            return result.returncode == 0
        except (OSError, subprocess.SubprocessError):
@@ -114,8 +113,7 @@ class DistributedWorker:
        try:
            if os.uname().sysname == "Darwin":
                result = subprocess.run(
-                    ["sysctl", "-n", "hw.memsize"],
-                    capture_output=True, text=True
+                    ["sysctl", "-n", "hw.memsize"], capture_output=True, text=True
                )
                bytes_mem = int(result.stdout.strip())
                return bytes_mem / (1024**3)
@@ -128,13 +126,11 @@ class DistributedWorker:
        except (OSError, ValueError):
            pass
        return 8.0  # Assume 8GB if we can't detect
-    
+
    def _has_command(self, cmd: str) -> bool:
        """Check if command exists."""
        try:
-            result = subprocess.run(
-                ["which", cmd], capture_output=True, timeout=5
-            )
+            result = subprocess.run(["which", cmd], capture_output=True, timeout=5)
            return result.returncode == 0
        except (OSError, subprocess.SubprocessError):
            return False
@@ -148,10 +144,10 @@ class DistributedWorker:
            "research": self._handle_research,
            "general": self._handle_general,
        }
-    
+
    def register_handler(self, task_type: str, handler: Callable[[str], Any]):
        """Register a custom task handler.
-        
+
        Args:
            task_type: Type of task this handler handles
            handler: Async function that takes task content and returns result
@@ -159,11 +155,11 @@ class DistributedWorker:
        self._handlers[task_type] = handler
        if task_type not in self.capabilities:
            self.capabilities.append(task_type)
-    
+
    # ──────────────────────────────────────────────────────────────────────────
    # Task Handlers
    # ──────────────────────────────────────────────────────────────────────────
-    
+
    async def _handle_shell(self, command: str) -> str:
        """Execute shell command via ZeroClaw or direct subprocess."""
        # Try ZeroClaw first if available
@@ -171,156 +167,153 @@ class DistributedWorker:
            proc = await asyncio.create_subprocess_shell(
                f"zeroclaw exec --json '{command}'",
                stdout=asyncio.subprocess.PIPE,
-                stderr=asyncio.subprocess.PIPE
+                stderr=asyncio.subprocess.PIPE,
            )
            stdout, stderr = await proc.communicate()
-            
+
            # Store result in brain
            await self.brain.remember(
                content=f"Shell: {command}\nOutput: {stdout.decode()}",
                tags=["shell", "result"],
                source=self.node_id,
-                metadata={"command": command, "exit_code": proc.returncode}
+                metadata={"command": command, "exit_code": proc.returncode},
            )
-            
+
            if proc.returncode != 0:
                raise Exception(f"Command failed: {stderr.decode()}")
            return stdout.decode()
-        
+
        # Fallback to direct subprocess (less safe)
        proc = await asyncio.create_subprocess_shell(
-            command,
-            stdout=asyncio.subprocess.PIPE,
-            stderr=asyncio.subprocess.PIPE
+            command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
        )
        stdout, stderr = await proc.communicate()
-        
+
        if proc.returncode != 0:
            raise Exception(f"Command failed: {stderr.decode()}")
        return stdout.decode()
-    
+
    async def _handle_creative(self, prompt: str) -> str:
        """Generate creative media (requires GPU)."""
        if "gpu" not in self.capabilities:
            raise Exception("GPU not available on this node")
-        
+
        # This would call creative tools (Stable Diffusion, etc.)
        # For now, placeholder
        logger.info(f"Creative task: {prompt[:50]}...")
-        
+
        # Store result
        result = f"Creative output for: {prompt}"
        await self.brain.remember(
            content=result,
            tags=["creative", "generated"],
            source=self.node_id,
-            metadata={"prompt": prompt}
+            metadata={"prompt": prompt},
        )
-        
+
        return result
-    
+
    async def _handle_code(self, description: str) -> str:
        """Code generation and modification."""
        # Would use LLM to generate code
        # For now, placeholder
        logger.info(f"Code task: {description[:50]}...")
        return f"Code generated for: {description}"
-    
+
    async def _handle_research(self, query: str) -> str:
        """Web research."""
        if "web" not in self.capabilities:
            raise Exception("Internet not available on this node")
-        
+
        # Would use browser automation or search
        logger.info(f"Research task: {query[:50]}...")
        return f"Research results for: {query}"
-    
+
    async def _handle_general(self, prompt: str) -> str:
        """General LLM task via local Ollama."""
        if "ollama" not in self.capabilities:
            raise Exception("Ollama not available on this node")
-        
+
        # Call Ollama
        try:
            proc = await asyncio.create_subprocess_exec(
-                "curl", "-s", "http://localhost:11434/api/generate",
-                "-d", json.dumps({
-                    "model": "llama3.1:8b-instruct",
-                    "prompt": prompt,
-                    "stream": False
-                }),
-                stdout=asyncio.subprocess.PIPE
+                "curl",
+                "-s",
+                "http://localhost:11434/api/generate",
+                "-d",
+                json.dumps({"model": "llama3.1:8b-instruct", "prompt": prompt, "stream": False}),
+                stdout=asyncio.subprocess.PIPE,
            )
            stdout, _ = await proc.communicate()
-            
+
            response = json.loads(stdout.decode())
            result = response.get("response", "No response")
-            
+
            # Store in brain
            await self.brain.remember(
                content=f"Task: {prompt}\nResult: {result}",
                tags=["llm", "result"],
                source=self.node_id,
-                metadata={"model": "llama3.1:8b-instruct"}
+                metadata={"model": "llama3.1:8b-instruct"},
            )
-            
+
            return result
-            
+
        except Exception as e:
            raise Exception(f"LLM failed: {e}")
-    
+
    # ──────────────────────────────────────────────────────────────────────────
    # Main Loop
    # ──────────────────────────────────────────────────────────────────────────
-    
+
    async def execute_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
        """Execute a claimed task."""
        task_type = task.get("type", "general")
        content = task.get("content", "")
        task_id = task.get("id")
-        
+
        handler = self._handlers.get(task_type, self._handlers["general"])
-        
+
        try:
            logger.info(f"Executing task {task_id}: {task_type}")
            result = await handler(content)
-            
+
            await self.brain.complete_task(task_id, success=True, result=result)
            logger.info(f"Task {task_id} completed")
            return {"success": True, "result": result}
-            
+
        except Exception as e:
            error_msg = str(e)
            logger.error(f"Task {task_id} failed: {error_msg}")
            await self.brain.complete_task(task_id, success=False, error=error_msg)
            return {"success": False, "error": error_msg}
-    
+
    async def run_once(self) -> bool:
        """Process one task if available.
-        
+
        Returns:
            True if a task was processed, False if no tasks available
        """
        task = await self.brain.claim_task(self.capabilities, self.node_id)
-        
+
        if task:
            await self.execute_task(task)
            return True
-        
+
        return False
-    
+
    async def run(self):
        """Main loop — continuously process tasks."""
        logger.info(f"Worker {self.node_id} started")
        logger.info(f"Capabilities: {self.capabilities}")
-        
+
        self.running = True
        consecutive_empty = 0
-        
+
        while self.running:
            try:
                had_work = await self.run_once()
-                
+
                if had_work:
                    # Immediately check for more work
                    consecutive_empty = 0
@@ -331,11 +324,11 @@ class DistributedWorker:
                    # Sleep 0.5s, but up to 2s if consistently empty
                    sleep_time = min(0.5 + (consecutive_empty * 0.1), 2.0)
                    await asyncio.sleep(sleep_time)
-                    
+
            except Exception as e:
                logger.error(f"Worker error: {e}")
                await asyncio.sleep(1)
-    
+
    def stop(self):
        """Stop the worker loop."""
        self.running = False
@@ -345,7 +338,7 @@ class DistributedWorker:
 async def main():
    """CLI entry point for worker."""
    import sys
-    
+
    # Allow capability overrides from CLI
    if len(sys.argv) > 1:
        caps = sys.argv[1].split(",")
@@ -354,12 +347,12 @@ async def main():
        logger.info(f"Overriding capabilities: {caps}")
    else:
        worker = DistributedWorker()
-    
+
    try:
        await worker.run()
    except KeyboardInterrupt:
        worker.stop()
-        print("\nWorker stopped.")
+        logger.info("Worker stopped.")


 if __name__ == "__main__":