202 lines
5.7 KiB
Python
202 lines
5.7 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
RCA (Root Cause Analysis) template generator for Ezra.
|
||
|
|
Creates structured RCA documents from incident parameters.
|
||
|
|
|
||
|
|
Epic: EZRA-SELF-001 / Phase 4 - Self-Monitoring & RCA
|
||
|
|
Author: Ezra (self-improvement)
|
||
|
|
"""
|
||
|
|
|
||
|
|
import json
|
||
|
|
from datetime import datetime
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Optional
|
||
|
|
|
||
|
|
|
||
|
|
class RCAGenerator:
|
||
|
|
"""Generate structured RCA documents."""
|
||
|
|
|
||
|
|
SEVERITY_LEVELS = {
|
||
|
|
"P0": "Critical - Service down, data loss risk",
|
||
|
|
"P1": "High - Major feature broken, workaround exists",
|
||
|
|
"P2": "Medium - Feature degraded, minor impact",
|
||
|
|
"P3": "Low - Cosmetic, minor inconvenience",
|
||
|
|
}
|
||
|
|
|
||
|
|
TEMPLATE = """# RCA-{number}: {title}
|
||
|
|
|
||
|
|
## Summary
|
||
|
|
| Field | Value |
|
||
|
|
|-------|-------|
|
||
|
|
| **Date** | {date} |
|
||
|
|
| **Severity** | {severity} - {severity_desc} |
|
||
|
|
| **Duration** | {duration} |
|
||
|
|
| **Affected** | {affected} |
|
||
|
|
| **Status** | {status} |
|
||
|
|
|
||
|
|
## Timeline
|
||
|
|
{timeline}
|
||
|
|
|
||
|
|
## Root Cause
|
||
|
|
{root_cause}
|
||
|
|
|
||
|
|
## Impact
|
||
|
|
{impact}
|
||
|
|
|
||
|
|
## Resolution
|
||
|
|
{resolution}
|
||
|
|
|
||
|
|
## 5-Whys Analysis
|
||
|
|
{five_whys}
|
||
|
|
|
||
|
|
## Action Items
|
||
|
|
{action_items}
|
||
|
|
|
||
|
|
## Lessons Learned
|
||
|
|
{lessons}
|
||
|
|
|
||
|
|
## Prevention
|
||
|
|
{prevention}
|
||
|
|
|
||
|
|
---
|
||
|
|
Generated by: Ezra RCA Generator
|
||
|
|
Date: {generated}
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self, rca_dir: str = None):
|
||
|
|
self.rca_dir = Path(rca_dir or "/root/wizards/ezra/reports/rca")
|
||
|
|
self.rca_dir.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
def _next_number(self) -> int:
|
||
|
|
"""Get next RCA number."""
|
||
|
|
existing = list(self.rca_dir.glob("RCA-*.md"))
|
||
|
|
if not existing:
|
||
|
|
return 1
|
||
|
|
numbers = []
|
||
|
|
for f in existing:
|
||
|
|
try:
|
||
|
|
num = int(f.stem.split("-")[1])
|
||
|
|
numbers.append(num)
|
||
|
|
except (IndexError, ValueError):
|
||
|
|
pass
|
||
|
|
return max(numbers, default=0) + 1
|
||
|
|
|
||
|
|
def generate(
|
||
|
|
self,
|
||
|
|
title: str,
|
||
|
|
severity: str = "P2",
|
||
|
|
duration: str = "Unknown",
|
||
|
|
affected: str = "Ezra wizard house",
|
||
|
|
root_cause: str = "Under investigation",
|
||
|
|
impact: str = "TBD",
|
||
|
|
resolution: str = "TBD",
|
||
|
|
timeline: list[dict] = None,
|
||
|
|
five_whys: list[str] = None,
|
||
|
|
action_items: list[dict] = None,
|
||
|
|
lessons: list[str] = None,
|
||
|
|
prevention: list[str] = None,
|
||
|
|
status: str = "Open",
|
||
|
|
number: int = None,
|
||
|
|
) -> tuple[str, Path]:
|
||
|
|
"""Generate an RCA document. Returns (content, file_path)."""
|
||
|
|
|
||
|
|
if number is None:
|
||
|
|
number = self._next_number()
|
||
|
|
|
||
|
|
# Format timeline
|
||
|
|
if timeline:
|
||
|
|
timeline_str = "\n".join(
|
||
|
|
f"- **{t.get('time', '??:??')}** - {t.get('event', 'Unknown event')}"
|
||
|
|
for t in timeline
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
timeline_str = "- TBD - Add timeline entries"
|
||
|
|
|
||
|
|
# Format 5-whys
|
||
|
|
if five_whys:
|
||
|
|
five_whys_str = "\n".join(
|
||
|
|
f"{i+1}. **Why?** {why}" for i, why in enumerate(five_whys)
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
five_whys_str = "1. **Why?** TBD\n2. **Why?** TBD\n3. **Why?** TBD"
|
||
|
|
|
||
|
|
# Format action items
|
||
|
|
if action_items:
|
||
|
|
action_items_str = "\n".join(
|
||
|
|
f"- [ ] **[{a.get('priority', 'P2')}]** {a.get('action', 'TBD')} "
|
||
|
|
f"(Owner: {a.get('owner', 'Ezra')})"
|
||
|
|
for a in action_items
|
||
|
|
)
|
||
|
|
else:
|
||
|
|
action_items_str = "- [ ] **[P2]** Add action items (Owner: Ezra)"
|
||
|
|
|
||
|
|
# Format lessons
|
||
|
|
lessons_str = "\n".join(f"- {l}" for l in (lessons or ["TBD"]))
|
||
|
|
prevention_str = "\n".join(f"- {p}" for p in (prevention or ["TBD"]))
|
||
|
|
|
||
|
|
content = self.TEMPLATE.format(
|
||
|
|
number=number,
|
||
|
|
title=title,
|
||
|
|
date=datetime.now().strftime("%Y-%m-%d"),
|
||
|
|
severity=severity,
|
||
|
|
severity_desc=self.SEVERITY_LEVELS.get(severity, "Unknown"),
|
||
|
|
duration=duration,
|
||
|
|
affected=affected,
|
||
|
|
status=status,
|
||
|
|
root_cause=root_cause,
|
||
|
|
impact=impact,
|
||
|
|
resolution=resolution,
|
||
|
|
timeline=timeline_str,
|
||
|
|
five_whys=five_whys_str,
|
||
|
|
action_items=action_items_str,
|
||
|
|
lessons=lessons_str,
|
||
|
|
prevention=prevention_str,
|
||
|
|
generated=datetime.now().isoformat(),
|
||
|
|
)
|
||
|
|
|
||
|
|
import re as _re
|
||
|
|
safe_title = _re.sub(r'[^a-z0-9-]', '', title.lower().replace(' ', '-'))[:40]
|
||
|
|
file_path = self.rca_dir / f"RCA-{number}-{safe_title}.md"
|
||
|
|
file_path.write_text(content)
|
||
|
|
|
||
|
|
return content, file_path
|
||
|
|
|
||
|
|
def list_rcas(self) -> list[dict]:
|
||
|
|
"""List existing RCAs."""
|
||
|
|
rcas = []
|
||
|
|
for f in sorted(self.rca_dir.glob("RCA-*.md")):
|
||
|
|
first_line = f.read_text().splitlines()[0] if f.stat().st_size > 0 else ""
|
||
|
|
rcas.append({
|
||
|
|
"file": f.name,
|
||
|
|
"title": first_line.replace("# ", ""),
|
||
|
|
"size": f.stat().st_size,
|
||
|
|
"modified": datetime.fromtimestamp(f.stat().st_mtime).isoformat(),
|
||
|
|
})
|
||
|
|
return rcas
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
gen = RCAGenerator()
|
||
|
|
content, path = gen.generate(
|
||
|
|
title="Example RCA",
|
||
|
|
severity="P2",
|
||
|
|
duration="30 minutes",
|
||
|
|
root_cause="Example root cause for testing",
|
||
|
|
timeline=[
|
||
|
|
{"time": "10:00", "event": "Issue detected"},
|
||
|
|
{"time": "10:15", "event": "Investigation started"},
|
||
|
|
{"time": "10:30", "event": "Root cause identified and fixed"},
|
||
|
|
],
|
||
|
|
five_whys=[
|
||
|
|
"The API returned 401",
|
||
|
|
"Token was expired",
|
||
|
|
"No token refresh automation existed",
|
||
|
|
],
|
||
|
|
action_items=[
|
||
|
|
{"priority": "P1", "action": "Implement token auto-refresh", "owner": "Ezra"},
|
||
|
|
],
|
||
|
|
status="Resolved",
|
||
|
|
)
|
||
|
|
print(f"Generated: {path}")
|
||
|
|
print(content)
|