fix: replace silent exception swallowing with debug logging across tools

Add logger.debug() calls to 27 bare 'except: pass' blocks across 7 core files, giving visibility into errors that were previously silently swallowed. This makes it much easier to diagnose user-reported issues from debug logs. Files changed: - tools/terminal_tool.py: 5 catches (stat, termios, fd close, cleanup) - tools/delegate_tool.py: 7 catches + added logger (spinner, callbacks) - tools/browser_tool.py: 5 catches (screenshot/recording cleanup, daemon kill) - tools/code_execution_tool.py: 2 remaining catches (socket, server close) - gateway/session.py: 2 catches (platform enum parse, temp file cleanup) - agent/display.py: 2 catches + added logger (JSON parse in failure detect) - agent/prompt_builder.py: 1 catch (skill description read) Deliberately kept bare pass for: - ImportError checks for optional dependencies (terminal_tool.py) - SystemExit/KeyboardInterrupt handlers - Spinner _write catch (would spam on every frame when stdout closed) - process_registry PID-alive check (canonical os.kill(pid,0) pattern) Extends the pattern from PR #686 (@aydnOktay).
2026-03-10 06:59:20 -07:00
parent 60cba55d82
commit 0fdeffe6c4
7 changed files with 49 additions and 45 deletions
--- a/agent/display.py
+++ b/agent/display.py
@@ -5,6 +5,7 @@ Used by AIAgent._execute_tool_calls for CLI feedback.
 """

 import json
+import logging
 import os
 import sys
 import threading
@@ -14,6 +15,8 @@ import time
 _RED = "\033[31m"
 _RESET = "\033[0m"

+logger = logging.getLogger(__name__)
+

 # =========================================================================
 # Skin-aware helpers (lazy import to avoid circular deps)
@@ -362,7 +365,7 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
            if exit_code is not None and exit_code != 0:
                return True, f" [exit {exit_code}]"
        except (json.JSONDecodeError, TypeError, AttributeError):
-            pass
+            logger.debug("Could not parse terminal result as JSON for exit code check")
        return False, ""

    # Memory-specific: distinguish "full" from real errors
@@ -372,7 +375,7 @@ def _detect_tool_failure(tool_name: str, result: str | None) -> tuple[bool, str]
            if data.get("success") is False and "exceed the limit" in data.get("error", ""):
                return True, " [full]"
        except (json.JSONDecodeError, TypeError, AttributeError):
-            pass
+            logger.debug("Could not parse memory result as JSON for capacity check")

    # Generic heuristic for non-terminal tools
    lower = result[:500].lower()
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -159,8 +159,8 @@ def _read_skill_description(skill_file: Path, max_chars: int = 60) -> str:
            if len(desc) > max_chars:
                desc = desc[:max_chars - 3] + "..."
            return desc
-    except Exception:
-        pass
+    except Exception as e:
+        logger.debug("Failed to read skill description from %s: %s", skill_file, e)
    return ""


--- a/gateway/session.py
+++ b/gateway/session.py
@@ -272,8 +272,8 @@ class SessionEntry:
        if data.get("platform"):
            try:
                platform = Platform(data["platform"])
-            except ValueError:
-                pass
+            except ValueError as e:
+                logger.debug("Unknown platform value %r: %s", data["platform"], e)
        
        return cls(
            session_key=data["session_key"],
@@ -370,8 +370,8 @@ class SessionStore:
        except BaseException:
            try:
                os.unlink(tmp_path)
-            except OSError:
-                pass
+            except OSError as e:
+                logger.debug("Could not remove temp file %s: %s", tmp_path, e)
            raise
    
    def _generate_session_key(self, source: SessionSource) -> str:
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -1615,10 +1615,10 @@ def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24):
            try:
                if f.stat().st_mtime < cutoff:
                    f.unlink()
-            except Exception:
-                pass
-    except Exception:
-        pass  # Non-critical — don't fail the screenshot operation
+            except Exception as e:
+                logger.debug("Failed to clean old screenshot %s: %s", f, e)
+    except Exception as e:
+        logger.debug("Screenshot cleanup error (non-critical): %s", e)


 def _cleanup_old_recordings(max_age_hours=72):
@@ -1634,10 +1634,10 @@ def _cleanup_old_recordings(max_age_hours=72):
            try:
                if f.stat().st_mtime < cutoff:
                    f.unlink()
-            except Exception:
-                pass
-    except Exception:
-        pass
+            except Exception as e:
+                logger.debug("Failed to clean old recording %s: %s", f, e)
+    except Exception as e:
+        logger.debug("Recording cleanup error (non-critical): %s", e)


 # ============================================================================
@@ -1749,7 +1749,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
                        os.kill(daemon_pid, signal.SIGTERM)
                        logger.debug("Killed daemon pid %s for %s", daemon_pid, session_name)
                    except (ProcessLookupError, ValueError, PermissionError, OSError):
-                        pass
+                        logger.debug("Could not kill daemon pid for %s (already dead or inaccessible)", session_name)
                shutil.rmtree(socket_dir, ignore_errors=True)
        
        logger.debug("Removed task %s from active sessions", task_id)
--- a/tools/code_execution_tool.py
+++ b/tools/code_execution_tool.py
@@ -335,8 +335,8 @@ def _rpc_server_loop(
        if conn:
            try:
                conn.close()
-            except OSError:
-                pass
+            except OSError as e:
+                logger.debug("RPC conn close error: %s", e)


 # ---------------------------------------------------------------------------
@@ -550,8 +550,8 @@ def execute_code(
        # Cleanup temp dir and socket
        try:
            server_sock.close()
-        except Exception:
-            pass
+        except Exception as e:
+            logger.debug("Server socket close error: %s", e)
        try:
            import shutil
            shutil.rmtree(tmpdir, ignore_errors=True)
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@@ -20,6 +20,7 @@ import contextlib
 import io
 import json
 import logging
+logger = logging.getLogger(__name__)
 import os
 import sys
 import time
@@ -107,8 +108,8 @@ def _build_child_progress_callback(task_index: int, parent_agent, task_count: in
                short = (preview[:55] + "...") if preview and len(preview) > 55 else (preview or "")
                try:
                    spinner.print_above(f" {prefix}├─ 💭 \"{short}\"")
-                except Exception:
-                    pass
+                except Exception as e:
+                    logger.debug("Spinner print_above failed: %s", e)
            # Don't relay thinking to gateway (too noisy for chat)
            return

@@ -129,8 +130,8 @@ def _build_child_progress_callback(task_index: int, parent_agent, task_count: in
                line += f"  \"{short}\""
            try:
                spinner.print_above(line)
-            except Exception:
-                pass
+            except Exception as e:
+                logger.debug("Spinner print_above failed: %s", e)

        if parent_cb:
            _batch.append(tool_name)
@@ -138,8 +139,8 @@ def _build_child_progress_callback(task_index: int, parent_agent, task_count: in
                summary = ", ".join(_batch)
                try:
                    parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
-                except Exception:
-                    pass
+                except Exception as e:
+                    logger.debug("Parent callback failed: %s", e)
                _batch.clear()

    def _flush():
@@ -148,8 +149,8 @@ def _build_child_progress_callback(task_index: int, parent_agent, task_count: in
            summary = ", ".join(_batch)
            try:
                parent_cb("subagent_progress", f"🔀 {prefix}{summary}")
-            except Exception:
-                pass
+            except Exception as e:
+                logger.debug("Parent callback flush failed: %s", e)
            _batch.clear()

    _callback._flush = _flush
@@ -241,8 +242,8 @@ def _run_single_child(
        if child_progress_cb and hasattr(child_progress_cb, '_flush'):
            try:
                child_progress_cb._flush()
-            except Exception:
-                pass
+            except Exception as e:
+                logger.debug("Progress callback flush failed: %s", e)

        duration = round(time.monotonic() - child_start, 2)

@@ -287,8 +288,8 @@ def _run_single_child(
        if hasattr(parent_agent, '_active_children'):
            try:
                parent_agent._active_children.remove(child)
-            except (ValueError, UnboundLocalError):
-                pass
+            except (ValueError, UnboundLocalError) as e:
+                logger.debug("Could not remove child from active_children: %s", e)


 def delegate_task(
@@ -425,8 +426,8 @@ def delegate_task(
                if spinner_ref and remaining > 0:
                    try:
                        spinner_ref.update_text(f"🔀 {remaining} task{'s' if remaining != 1 else ''} remaining")
-                    except Exception:
-                        pass
+                    except Exception as e:
+                        logger.debug("Spinner update_text failed: %s", e)

        # Restore stdout/stderr in case redirect_stdout race left them as devnull
        sys.stdout = _saved_stdout
--- a/tools/terminal_tool.py
+++ b/tools/terminal_tool.py
@@ -84,8 +84,8 @@ def _check_disk_usage_warning():
                if f.is_file():
                    try:
                        total_bytes += f.stat().st_size
-                    except OSError:
-                        pass
+                    except OSError as e:
+                        logger.debug("Could not stat file %s: %s", f, e)
        
        total_gb = total_bytes / (1024 ** 3)
        
@@ -231,13 +231,13 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
                try:
                    import termios as _termios
                    _termios.tcsetattr(tty_fd, _termios.TCSAFLUSH, old_attrs)
-                except Exception:
-                    pass
+                except Exception as e:
+                    logger.debug("Failed to restore terminal attributes: %s", e)
            if tty_fd is not None:
                try:
                    os.close(tty_fd)
-                except Exception:
-                    pass
+                except Exception as e:
+                    logger.debug("Failed to close tty fd: %s", e)
            result["done"] = True
    
    try:
@@ -690,8 +690,8 @@ def get_active_environments_info() -> Dict[str, Any]:
            try:
                size = sum(f.stat().st_size for f in Path(path).rglob('*') if f.is_file())
                total_size += size
-            except OSError:
-                pass
+            except OSError as e:
+                logger.debug("Could not stat path %s: %s", path, e)
    
    info["total_disk_usage_mb"] = round(total_size / (1024 * 1024), 2)
    return info
@@ -718,8 +718,8 @@ def cleanup_all_environments():
        try:
            shutil.rmtree(path, ignore_errors=True)
            logger.info("Removed orphaned: %s", path)
-        except OSError:
-            pass
+        except OSError as e:
+            logger.debug("Failed to remove orphaned path %s: %s", path, e)
    
    if cleaned > 0:
        logger.info("Cleaned %d environments", cleaned)