Files
Timmy-time-dashboard/src/infrastructure/error_capture.py
2026-03-05 19:45:38 -05:00

236 lines
6.8 KiB
Python

"""Centralized error capture with automatic bug report creation.
Catches errors from anywhere in the system, deduplicates them, logs them
to the event log, and creates bug report tasks in the task queue.
Usage:
from infrastructure.error_capture import capture_error
try:
risky_operation()
except Exception as exc:
capture_error(exc, source="my_module", context={"request": "/api/foo"})
"""
import hashlib
import logging
import traceback
from datetime import datetime, timedelta, timezone
from typing import Optional
logger = logging.getLogger(__name__)
# In-memory dedup cache: hash -> last_seen timestamp
_dedup_cache: dict[str, datetime] = {}
def _stack_hash(exc: Exception) -> str:
"""Create a stable hash of the exception type + traceback locations.
Only hashes the file/line/function info from the traceback, not
variable values, so the same bug produces the same hash even if
runtime data differs.
"""
tb_lines = traceback.format_exception(type(exc), exc, exc.__traceback__)
# Extract only "File ..., line ..., in ..." lines for stable hashing
stable_parts = [type(exc).__name__]
for line in tb_lines:
stripped = line.strip()
if stripped.startswith("File "):
stable_parts.append(stripped)
return hashlib.sha256("\n".join(stable_parts).encode()).hexdigest()[:16]
def _is_duplicate(error_hash: str) -> bool:
"""Check if this error was seen recently (within dedup window)."""
from config import settings
now = datetime.now(timezone.utc)
window = timedelta(seconds=settings.error_dedup_window_seconds)
if error_hash in _dedup_cache:
last_seen = _dedup_cache[error_hash]
if now - last_seen < window:
return True
_dedup_cache[error_hash] = now
# Prune old entries
cutoff = now - window * 2
expired = [k for k, v in _dedup_cache.items() if v < cutoff]
for k in expired:
del _dedup_cache[k]
return False
def _get_git_context() -> dict:
"""Get current git branch and commit for the bug report."""
try:
import subprocess
from config import settings
branch = subprocess.run(
["git", "branch", "--show-current"],
capture_output=True,
text=True,
timeout=5,
cwd=settings.repo_root,
).stdout.strip()
commit = subprocess.run(
["git", "rev-parse", "--short", "HEAD"],
capture_output=True,
text=True,
timeout=5,
cwd=settings.repo_root,
).stdout.strip()
return {"branch": branch, "commit": commit}
except Exception:
return {"branch": "unknown", "commit": "unknown"}
def capture_error(
exc: Exception,
source: str = "unknown",
context: Optional[dict] = None,
) -> Optional[str]:
"""Capture an error and optionally create a bug report.
Args:
exc: The exception to capture
source: Module/component where the error occurred
context: Optional dict of extra context (request path, etc.)
Returns:
Task ID of the created bug report, or None if deduplicated/disabled
"""
from config import settings
if not settings.error_feedback_enabled:
return None
error_hash = _stack_hash(exc)
if _is_duplicate(error_hash):
logger.debug("Duplicate error suppressed: %s (hash=%s)", exc, error_hash)
return None
# Format the stack trace
tb_str = "".join(
traceback.format_exception(type(exc), exc, exc.__traceback__)
)
# Extract file/line from traceback
tb_obj = exc.__traceback__
affected_file = "unknown"
affected_line = 0
while tb_obj and tb_obj.tb_next:
tb_obj = tb_obj.tb_next
if tb_obj:
affected_file = tb_obj.tb_frame.f_code.co_filename
affected_line = tb_obj.tb_lineno
git_ctx = _get_git_context()
# 1. Log to event_log
try:
from swarm.event_log import EventType, log_event
log_event(
EventType.ERROR_CAPTURED,
source=source,
data={
"error_type": type(exc).__name__,
"message": str(exc)[:500],
"hash": error_hash,
"file": affected_file,
"line": affected_line,
"git_branch": git_ctx.get("branch", ""),
"git_commit": git_ctx.get("commit", ""),
},
)
except Exception as log_exc:
logger.debug("Failed to log error event: %s", log_exc)
# 2. Create bug report task
task_id = None
try:
from swarm.task_queue.models import create_task
title = f"[BUG] {type(exc).__name__}: {str(exc)[:80]}"
description_parts = [
f"**Error:** {type(exc).__name__}: {str(exc)}",
f"**Source:** {source}",
f"**File:** {affected_file}:{affected_line}",
f"**Git:** {git_ctx.get('branch', '?')} @ {git_ctx.get('commit', '?')}",
f"**Time:** {datetime.now(timezone.utc).isoformat()}",
f"**Hash:** {error_hash}",
]
if context:
ctx_str = ", ".join(f"{k}={v}" for k, v in context.items())
description_parts.append(f"**Context:** {ctx_str}")
description_parts.append(f"\n**Stack Trace:**\n```\n{tb_str[:2000]}\n```")
task = create_task(
title=title,
description="\n".join(description_parts),
assigned_to="default",
created_by="system",
priority="normal",
requires_approval=False,
auto_approve=True,
task_type="bug_report",
)
task_id = task.id
# Log the creation event
try:
from swarm.event_log import EventType, log_event
log_event(
EventType.BUG_REPORT_CREATED,
source=source,
task_id=task_id,
data={
"error_hash": error_hash,
"title": title[:100],
},
)
except Exception:
pass
except Exception as task_exc:
logger.debug("Failed to create bug report task: %s", task_exc)
# 3. Send notification
try:
from infrastructure.notifications.push import notifier
notifier.notify(
title="Bug Report Filed",
message=f"{type(exc).__name__} in {source}: {str(exc)[:80]}",
category="system",
)
except Exception:
pass
# 4. Record in session logger
try:
from timmy.session_logger import get_session_logger
session_logger = get_session_logger()
session_logger.record_error(
error=f"{type(exc).__name__}: {str(exc)}",
context=source,
)
except Exception:
pass
return task_id