feat: generate daily/weekly agent scorecards (#712 )

Implements agent scorecard system that tracks and summarizes agent performance: - Track issues touched, PRs opened/merged, tests affected, tokens earned/spent - Generate compact scorecards for daily or weekly periods - Pattern detection: high/low merge rates, silent workers, token accumulation - API endpoints for programmatic access (/scorecards/api/*) - HTML dashboard with HTMX-powered live updates - Added navigation link in both desktop and mobile menus New modules: - dashboard/services/scorecard_service.py: Core scoring logic - dashboard/routes/scorecards.py: API and HTML routes - templates/scorecards.html: Dashboard UI - tests/dashboard/test_scorecards.py: Comprehensive test suite Refs #712
2026-03-21 16:55:15 -04:00
12 changed files with 1682 additions and 1120 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -330,13 +330,6 @@ class Settings(BaseSettings):
    autoresearch_max_iterations: int = 100
    autoresearch_metric: str = "val_bpb"  # metric to optimise (lower = better)
    # ── Weekly Narrative Summary ───────────────────────────────────────
    # Generates a human-readable weekly summary of development activity.
    # Disabling this will stop the weekly narrative generation.
    weekly_narrative_enabled: bool = True
    weekly_narrative_lookback_days: int = 7
    weekly_narrative_output_dir: str = ".loop"
    # ── Local Hands (Shell + Git) ──────────────────────────────────────
    # Enable local shell/git execution hands.
    hands_shell_enabled: bool = True
--- a/src/dashboard/app.py
+++ b/src/dashboard/app.py
@@ -44,6 +44,7 @@ from dashboard.routes.mobile import router as mobile_router
 from dashboard.routes.models import api_router as models_api_router
 from dashboard.routes.models import router as models_router
 from dashboard.routes.quests import router as quests_router
 from dashboard.routes.scorecards import router as scorecards_router
 from dashboard.routes.spark import router as spark_router
 from dashboard.routes.system import router as system_router
 from dashboard.routes.tasks import router as tasks_router
@@ -629,6 +630,7 @@ app.include_router(matrix_router)
 app.include_router(tower_router)
 app.include_router(daily_run_router)
 app.include_router(quests_router)
 app.include_router(scorecards_router)
@app.websocket("/ws")
--- a/src/dashboard/routes/scorecards.py
+++ b/src/dashboard/routes/scorecards.py
@@ -0,0 +1,353 @@
 """Agent scorecard routes — API endpoints for generating and viewing scorecards."""
 from __future__ import annotations
 import logging
 from datetime import datetime
 from fastapi import APIRouter, Query, Request
 from fastapi.responses import HTMLResponse, JSONResponse
 from dashboard.services.scorecard_service import (
    PeriodType,
    generate_all_scorecards,
    generate_scorecard,
    get_tracked_agents,
 )
 from dashboard.templating import templates
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/scorecards", tags=["scorecards"])
 def _format_period_label(period_type: PeriodType) -> str:
    """Format a period type for display."""
    return "Daily" if period_type == PeriodType.daily else "Weekly"
@router.get("/api/agents")
 async def list_tracked_agents() -> dict[str, list[str]]:
    """Return the list of tracked agent IDs.
    Returns:
        Dict with "agents" key containing list of agent IDs
    """
    return {"agents": get_tracked_agents()}
@router.get("/api/{agent_id}")
 async def get_agent_scorecard(
    agent_id: str,
    period: str = Query(default="daily", description="Period type: 'daily' or 'weekly'"),
 ) -> JSONResponse:
    """Generate a scorecard for a specific agent.
    Args:
        agent_id: The agent ID (e.g., 'kimi', 'claude')
        period: 'daily' or 'weekly' (default: daily)
    Returns:
        JSON response with scorecard data
    """
    try:
        period_type = PeriodType(period.lower())
    except ValueError:
        return JSONResponse(
            status_code=400,
            content={"error": f"Invalid period '{period}'. Use 'daily' or 'weekly'."},
        )
    try:
        scorecard = generate_scorecard(agent_id, period_type)
        if scorecard is None:
            return JSONResponse(
                status_code=404,
                content={"error": f"No scorecard found for agent '{agent_id}'"},
            )
        return JSONResponse(content=scorecard.to_dict())
    except Exception as exc:
        logger.error("Failed to generate scorecard for %s: %s", agent_id, exc)
        return JSONResponse(
            status_code=500,
            content={"error": f"Failed to generate scorecard: {str(exc)}"},
        )
@router.get("/api")
 async def get_all_scorecards(
    period: str = Query(default="daily", description="Period type: 'daily' or 'weekly'"),
 ) -> JSONResponse:
    """Generate scorecards for all tracked agents.
    Args:
        period: 'daily' or 'weekly' (default: daily)
    Returns:
        JSON response with list of scorecard data
    """
    try:
        period_type = PeriodType(period.lower())
    except ValueError:
        return JSONResponse(
            status_code=400,
            content={"error": f"Invalid period '{period}'. Use 'daily' or 'weekly'."},
        )
    try:
        scorecards = generate_all_scorecards(period_type)
        return JSONResponse(
            content={
                "period": period_type.value,
                "scorecards": [s.to_dict() for s in scorecards],
                "count": len(scorecards),
            }
        )
    except Exception as exc:
        logger.error("Failed to generate scorecards: %s", exc)
        return JSONResponse(
            status_code=500,
            content={"error": f"Failed to generate scorecards: {str(exc)}"},
        )
@router.get("", response_class=HTMLResponse)
 async def scorecards_page(request: Request) -> HTMLResponse:
    """Render the scorecards dashboard page.
    Returns:
        HTML page with scorecard interface
    """
    agents = get_tracked_agents()
    return templates.TemplateResponse(
        request,
        "scorecards.html",
        {
            "agents": agents,
            "periods": ["daily", "weekly"],
        },
    )
@router.get("/panel/{agent_id}", response_class=HTMLResponse)
 async def agent_scorecard_panel(
    request: Request,
    agent_id: str,
    period: str = Query(default="daily"),
 ) -> HTMLResponse:
    """Render an individual agent scorecard panel (for HTMX).
    Args:
        request: The request object
        agent_id: The agent ID
        period: 'daily' or 'weekly'
    Returns:
        HTML panel with scorecard content
    """
    try:
        period_type = PeriodType(period.lower())
    except ValueError:
        period_type = PeriodType.daily
    try:
        scorecard = generate_scorecard(agent_id, period_type)
        if scorecard is None:
            return HTMLResponse(
                content=f"""
                <div class="card mc-panel">
                    <h5 class="card-title">{agent_id.title()}</h5>
                    <p class="text-muted">No activity recorded for this period.</p>
                </div>
                """,
                status_code=200,
            )
        data = scorecard.to_dict()
        # Build patterns HTML
        patterns_html = ""
        if data["patterns"]:
            patterns_list = "".join([f"<li>{p}</li>" for p in data["patterns"]])
            patterns_html = f"""
            <div class="mt-3">
                <h6>Patterns</h6>
                <ul class="list-unstyled text-info">
                    {patterns_list}
                </ul>
            </div>
            """
        # Build bullets HTML
        bullets_html = "".join([f"<li>{b}</li>" for b in data["narrative_bullets"]])
        # Build metrics summary
        metrics = data["metrics"]
        html_content = f"""
        <div class="card mc-panel">
            <div class="card-header d-flex justify-content-between align-items-center">
                <h5 class="card-title mb-0">{agent_id.title()}</h5>
                <span class="badge bg-secondary">{_format_period_label(period_type)}</span>
            </div>
            <div class="card-body">
                <ul class="list-unstyled mb-3">
                    {bullets_html}
                </ul>
                <div class="row text-center small">
                    <div class="col">
                        <div class="text-muted">PRs</div>
                        <div class="fw-bold">{metrics["prs_opened"]}/{metrics["prs_merged"]}</div>
                        <div class="text-muted" style="font-size: 0.75rem;">
                            {int(metrics["pr_merge_rate"] * 100)}% merged
                        </div>
                    </div>
                    <div class="col">
                        <div class="text-muted">Issues</div>
                        <div class="fw-bold">{metrics["issues_touched"]}</div>
                    </div>
                    <div class="col">
                        <div class="text-muted">Tests</div>
                        <div class="fw-bold">{metrics["tests_affected"]}</div>
                    </div>
                    <div class="col">
                        <div class="text-muted">Tokens</div>
                        <div class="fw-bold {"text-success" if metrics["token_net"] >= 0 else "text-danger"}">
                            {"+" if metrics["token_net"] > 0 else ""}{metrics["token_net"]}
                        </div>
                    </div>
                </div>
                {patterns_html}
            </div>
        </div>
        """
        return HTMLResponse(content=html_content)
    except Exception as exc:
        logger.error("Failed to render scorecard panel for %s: %s", agent_id, exc)
        return HTMLResponse(
            content=f"""
            <div class="card mc-panel border-danger">
                <h5 class="card-title">{agent_id.title()}</h5>
                <p class="text-danger">Error loading scorecard: {str(exc)}</p>
            </div>
            """,
            status_code=200,
        )
@router.get("/all/panels", response_class=HTMLResponse)
 async def all_scorecard_panels(
    request: Request,
    period: str = Query(default="daily"),
 ) -> HTMLResponse:
    """Render all agent scorecard panels (for HTMX).
    Args:
        request: The request object
        period: 'daily' or 'weekly'
    Returns:
        HTML with all scorecard panels
    """
    try:
        period_type = PeriodType(period.lower())
    except ValueError:
        period_type = PeriodType.daily
    try:
        scorecards = generate_all_scorecards(period_type)
        panels: list[str] = []
        for scorecard in scorecards:
            data = scorecard.to_dict()
            # Build patterns HTML
            patterns_html = ""
            if data["patterns"]:
                patterns_list = "".join([f"<li>{p}</li>" for p in data["patterns"]])
                patterns_html = f"""
                <div class="mt-3">
                    <h6>Patterns</h6>
                    <ul class="list-unstyled text-info">
                        {patterns_list}
                    </ul>
                </div>
                """
            # Build bullets HTML
            bullets_html = "".join([f"<li>{b}</li>" for b in data["narrative_bullets"]])
            metrics = data["metrics"]
            panel_html = f"""
            <div class="col-md-6 col-lg-4 mb-3">
                <div class="card mc-panel">
                    <div class="card-header d-flex justify-content-between align-items-center">
                        <h5 class="card-title mb-0">{scorecard.agent_id.title()}</h5>
                        <span class="badge bg-secondary">{_format_period_label(period_type)}</span>
                    </div>
                    <div class="card-body">
                        <ul class="list-unstyled mb-3">
                            {bullets_html}
                        </ul>
                        <div class="row text-center small">
                            <div class="col">
                                <div class="text-muted">PRs</div>
                                <div class="fw-bold">{metrics["prs_opened"]}/{metrics["prs_merged"]}</div>
                                <div class="text-muted" style="font-size: 0.75rem;">
                                    {int(metrics["pr_merge_rate"] * 100)}% merged
                                </div>
                            </div>
                            <div class="col">
                                <div class="text-muted">Issues</div>
                                <div class="fw-bold">{metrics["issues_touched"]}</div>
                            </div>
                            <div class="col">
                                <div class="text-muted">Tests</div>
                                <div class="fw-bold">{metrics["tests_affected"]}</div>
                            </div>
                            <div class="col">
                                <div class="text-muted">Tokens</div>
                                <div class="fw-bold {"text-success" if metrics["token_net"] >= 0 else "text-danger"}">
                                    {"+" if metrics["token_net"] > 0 else ""}{metrics["token_net"]}
                                </div>
                            </div>
                        </div>
                        {patterns_html}
                    </div>
                </div>
            </div>
            """
            panels.append(panel_html)
        html_content = f"""
        <div class="row">
            {"".join(panels)}
        </div>
        <div class="text-muted small mt-2">
            Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S UTC")}
        </div>
        """
        return HTMLResponse(content=html_content)
    except Exception as exc:
        logger.error("Failed to render all scorecard panels: %s", exc)
        return HTMLResponse(
            content=f"""
            <div class="alert alert-danger">
                Error loading scorecards: {str(exc)}
            </div>
            """,
            status_code=200,
        )
--- a/src/dashboard/services/init.py
+++ b/src/dashboard/services/init.py
@@ -0,0 +1,17 @@
 """Dashboard services for business logic."""
 from dashboard.services.scorecard_service import (
    PeriodType,
    ScorecardSummary,
    generate_all_scorecards,
    generate_scorecard,
    get_tracked_agents,
 )
 __all__ = [
    "PeriodType",
    "ScorecardSummary",
    "generate_all_scorecards",
    "generate_scorecard",
    "get_tracked_agents",
 ]
--- a/src/dashboard/services/scorecard_service.py
+++ b/src/dashboard/services/scorecard_service.py
@@ -0,0 +1,515 @@
 """Agent scorecard service — track and summarize agent performance.
 Generates daily/weekly scorecards showing:
 - Issues touched, PRs opened/merged
 - Tests affected, tokens earned/spent
 - Pattern highlights (merge rate, activity quality)
 """
 from __future__ import annotations
 import logging
 from dataclasses import dataclass, field
 from datetime import UTC, datetime, timedelta
 from enum import StrEnum
 from typing import Any
 from infrastructure.events.bus import Event, get_event_bus
 logger = logging.getLogger(__name__)
 # Bot/agent usernames to track
 TRACKED_AGENTS = frozenset({"hermes", "kimi", "manus", "claude", "gemini"})
 class PeriodType(StrEnum):
    daily = "daily"
    weekly = "weekly"
@dataclass
 class AgentMetrics:
    """Raw metrics collected for an agent over a period."""
    agent_id: str
    issues_touched: set[int] = field(default_factory=set)
    prs_opened: set[int] = field(default_factory=set)
    prs_merged: set[int] = field(default_factory=set)
    tests_affected: set[str] = field(default_factory=set)
    tokens_earned: int = 0
    tokens_spent: int = 0
    commits: int = 0
    comments: int = 0
    @property
    def pr_merge_rate(self) -> float:
        """Calculate PR merge rate (0.0 - 1.0)."""
        opened = len(self.prs_opened)
        if opened == 0:
            return 0.0
        return len(self.prs_merged) / opened
@dataclass
 class ScorecardSummary:
    """A generated scorecard with narrative summary."""
    agent_id: str
    period_type: PeriodType
    period_start: datetime
    period_end: datetime
    metrics: AgentMetrics
    narrative_bullets: list[str] = field(default_factory=list)
    patterns: list[str] = field(default_factory=list)
    def to_dict(self) -> dict[str, Any]:
        """Convert scorecard to dictionary for JSON serialization."""
        return {
            "agent_id": self.agent_id,
            "period_type": self.period_type.value,
            "period_start": self.period_start.isoformat(),
            "period_end": self.period_end.isoformat(),
            "metrics": {
                "issues_touched": len(self.metrics.issues_touched),
                "prs_opened": len(self.metrics.prs_opened),
                "prs_merged": len(self.metrics.prs_merged),
                "pr_merge_rate": round(self.metrics.pr_merge_rate, 2),
                "tests_affected": len(self.tests_affected),
                "commits": self.metrics.commits,
                "comments": self.metrics.comments,
                "tokens_earned": self.metrics.tokens_earned,
                "tokens_spent": self.metrics.tokens_spent,
                "token_net": self.metrics.tokens_earned - self.metrics.tokens_spent,
            },
            "narrative_bullets": self.narrative_bullets,
            "patterns": self.patterns,
        }
    @property
    def tests_affected(self) -> set[str]:
        """Alias for metrics.tests_affected."""
        return self.metrics.tests_affected
 def _get_period_bounds(
    period_type: PeriodType, reference_date: datetime | None = None
 ) -> tuple[datetime, datetime]:
    """Calculate start and end timestamps for a period.
    Args:
        period_type: daily or weekly
        reference_date: The date to calculate from (defaults to now)
    Returns:
        Tuple of (period_start, period_end) in UTC
    """
    if reference_date is None:
        reference_date = datetime.now(UTC)
    # Normalize to start of day
    end = reference_date.replace(hour=0, minute=0, second=0, microsecond=0)
    if period_type == PeriodType.daily:
        start = end - timedelta(days=1)
    else:  # weekly
        start = end - timedelta(days=7)
    return start, end
 def _collect_events_for_period(
    start: datetime, end: datetime, agent_id: str | None = None
 ) -> list[Event]:
    """Collect events from the event bus for a time period.
    Args:
        start: Period start time
        end: Period end time
        agent_id: Optional agent filter
    Returns:
        List of matching events
    """
    bus = get_event_bus()
    events: list[Event] = []
    # Query persisted events for relevant types
    event_types = [
        "gitea.push",
        "gitea.issue.opened",
        "gitea.issue.comment",
        "gitea.pull_request",
        "agent.task.completed",
        "test.execution",
    ]
    for event_type in event_types:
        try:
            type_events = bus.replay(
                event_type=event_type,
                source=agent_id,
                limit=1000,
            )
            events.extend(type_events)
        except Exception as exc:
            logger.debug("Failed to replay events for %s: %s", event_type, exc)
    # Filter by timestamp
    filtered = []
    for event in events:
        try:
            event_time = datetime.fromisoformat(event.timestamp.replace("Z", "+00:00"))
            if start <= event_time < end:
                filtered.append(event)
        except (ValueError, AttributeError):
            continue
    return filtered
 def _extract_actor_from_event(event: Event) -> str:
    """Extract the actor/agent from an event."""
    # Try data fields first
    if "actor" in event.data:
        return event.data["actor"]
    if "agent_id" in event.data:
        return event.data["agent_id"]
    # Fall back to source
    return event.source
 def _is_tracked_agent(actor: str) -> bool:
    """Check if an actor is a tracked agent."""
    return actor.lower() in TRACKED_AGENTS
 def _aggregate_metrics(events: list[Event]) -> dict[str, AgentMetrics]:
    """Aggregate metrics from events grouped by agent.
    Args:
        events: List of events to process
    Returns:
        Dict mapping agent_id -> AgentMetrics
    """
    metrics_by_agent: dict[str, AgentMetrics] = {}
    for event in events:
        actor = _extract_actor_from_event(event)
        # Skip non-agent events unless they explicitly have an agent_id
        if not _is_tracked_agent(actor) and "agent_id" not in event.data:
            continue
        if actor not in metrics_by_agent:
            metrics_by_agent[actor] = AgentMetrics(agent_id=actor)
        metrics = metrics_by_agent[actor]
        # Process based on event type
        event_type = event.type
        if event_type == "gitea.push":
            metrics.commits += event.data.get("num_commits", 1)
        elif event_type == "gitea.issue.opened":
            issue_num = event.data.get("issue_number", 0)
            if issue_num:
                metrics.issues_touched.add(issue_num)
        elif event_type == "gitea.issue.comment":
            metrics.comments += 1
            issue_num = event.data.get("issue_number", 0)
            if issue_num:
                metrics.issues_touched.add(issue_num)
        elif event_type == "gitea.pull_request":
            pr_num = event.data.get("pr_number", 0)
            action = event.data.get("action", "")
            merged = event.data.get("merged", False)
            if pr_num:
                if action == "opened":
                    metrics.prs_opened.add(pr_num)
                elif action == "closed" and merged:
                    metrics.prs_merged.add(pr_num)
                    # Also count as touched issue for tracking
                    metrics.issues_touched.add(pr_num)
        elif event_type == "agent.task.completed":
            # Extract test files from task data
            affected = event.data.get("tests_affected", [])
            for test in affected:
                metrics.tests_affected.add(test)
            # Token rewards from task completion
            reward = event.data.get("token_reward", 0)
            if reward:
                metrics.tokens_earned += reward
        elif event_type == "test.execution":
            # Track test files that were executed
            test_files = event.data.get("test_files", [])
            for test in test_files:
                metrics.tests_affected.add(test)
    return metrics_by_agent
 def _query_token_transactions(agent_id: str, start: datetime, end: datetime) -> tuple[int, int]:
    """Query the lightning ledger for token transactions.
    Args:
        agent_id: The agent to query for
        start: Period start
        end: Period end
    Returns:
        Tuple of (tokens_earned, tokens_spent)
    """
    try:
        from lightning.ledger import get_transactions
        transactions = get_transactions(limit=1000)
        earned = 0
        spent = 0
        for tx in transactions:
            # Filter by agent if specified
            if tx.agent_id and tx.agent_id != agent_id:
                continue
            # Filter by timestamp
            try:
                tx_time = datetime.fromisoformat(tx.created_at.replace("Z", "+00:00"))
                if not (start <= tx_time < end):
                    continue
            except (ValueError, AttributeError):
                continue
            if tx.tx_type.value == "incoming":
                earned += tx.amount_sats
            else:
                spent += tx.amount_sats
        return earned, spent
    except Exception as exc:
        logger.debug("Failed to query token transactions: %s", exc)
        return 0, 0
 def _generate_narrative_bullets(metrics: AgentMetrics, period_type: PeriodType) -> list[str]:
    """Generate narrative summary bullets for a scorecard.
    Args:
        metrics: The agent's metrics
        period_type: daily or weekly
    Returns:
        List of narrative bullet points
    """
    bullets: list[str] = []
    period_label = "day" if period_type == PeriodType.daily else "week"
    # Activity summary
    activities = []
    if metrics.commits:
        activities.append(f"{metrics.commits} commit{'s' if metrics.commits != 1 else ''}")
    if len(metrics.prs_opened):
        activities.append(
            f"{len(metrics.prs_opened)} PR{'s' if len(metrics.prs_opened) != 1 else ''} opened"
        )
    if len(metrics.prs_merged):
        activities.append(
            f"{len(metrics.prs_merged)} PR{'s' if len(metrics.prs_merged) != 1 else ''} merged"
        )
    if len(metrics.issues_touched):
        activities.append(
            f"{len(metrics.issues_touched)} issue{'s' if len(metrics.issues_touched) != 1 else ''} touched"
        )
    if metrics.comments:
        activities.append(f"{metrics.comments} comment{'s' if metrics.comments != 1 else ''}")
    if activities:
        bullets.append(f"Active across {', '.join(activities)} this {period_label}.")
    # Test activity
    if len(metrics.tests_affected):
        bullets.append(
            f"Affected {len(metrics.tests_affected)} test file{'s' if len(metrics.tests_affected) != 1 else ''}."
        )
    # Token summary
    net_tokens = metrics.tokens_earned - metrics.tokens_spent
    if metrics.tokens_earned or metrics.tokens_spent:
        if net_tokens > 0:
            bullets.append(
                f"Net earned {net_tokens} tokens ({metrics.tokens_earned} earned, {metrics.tokens_spent} spent)."
            )
        elif net_tokens < 0:
            bullets.append(
                f"Net spent {abs(net_tokens)} tokens ({metrics.tokens_earned} earned, {metrics.tokens_spent} spent)."
            )
        else:
            bullets.append(
                f"Balanced token flow ({metrics.tokens_earned} earned, {metrics.tokens_spent} spent)."
            )
    # Handle empty case
    if not bullets:
        bullets.append(f"No recorded activity this {period_label}.")
    return bullets
 def _detect_patterns(metrics: AgentMetrics) -> list[str]:
    """Detect interesting patterns in agent behavior.
    Args:
        metrics: The agent's metrics
    Returns:
        List of pattern descriptions
    """
    patterns: list[str] = []
    pr_opened = len(metrics.prs_opened)
    merge_rate = metrics.pr_merge_rate
    # Merge rate patterns
    if pr_opened >= 3:
        if merge_rate >= 0.8:
            patterns.append("High merge rate with few failures — code quality focus.")
        elif merge_rate <= 0.3:
            patterns.append("Lots of noisy PRs, low merge rate — may need review support.")
    # Activity patterns
    if metrics.commits > 10 and pr_opened == 0:
        patterns.append("High commit volume without PRs — working directly on main?")
    if len(metrics.issues_touched) > 5 and metrics.comments == 0:
        patterns.append("Touching many issues but low comment volume — silent worker.")
    if metrics.comments > len(metrics.issues_touched) * 2:
        patterns.append("Highly communicative — lots of discussion relative to work items.")
    # Token patterns
    net_tokens = metrics.tokens_earned - metrics.tokens_spent
    if net_tokens > 100:
        patterns.append("Strong token accumulation — high value delivery.")
    elif net_tokens < -50:
        patterns.append("High token spend — may be in experimentation phase.")
    return patterns
 def generate_scorecard(
    agent_id: str,
    period_type: PeriodType = PeriodType.daily,
    reference_date: datetime | None = None,
 ) -> ScorecardSummary | None:
    """Generate a scorecard for a single agent.
    Args:
        agent_id: The agent to generate scorecard for
        period_type: daily or weekly
        reference_date: The date to calculate from (defaults to now)
    Returns:
        ScorecardSummary or None if agent has no activity
    """
    start, end = _get_period_bounds(period_type, reference_date)
    # Collect events
    events = _collect_events_for_period(start, end, agent_id)
    # Aggregate metrics
    all_metrics = _aggregate_metrics(events)
    # Get metrics for this specific agent
    if agent_id not in all_metrics:
        # Create empty metrics - still generate a scorecard
        metrics = AgentMetrics(agent_id=agent_id)
    else:
        metrics = all_metrics[agent_id]
    # Augment with token data from ledger
    tokens_earned, tokens_spent = _query_token_transactions(agent_id, start, end)
    metrics.tokens_earned = max(metrics.tokens_earned, tokens_earned)
    metrics.tokens_spent = max(metrics.tokens_spent, tokens_spent)
    # Generate narrative and patterns
    narrative = _generate_narrative_bullets(metrics, period_type)
    patterns = _detect_patterns(metrics)
    return ScorecardSummary(
        agent_id=agent_id,
        period_type=period_type,
        period_start=start,
        period_end=end,
        metrics=metrics,
        narrative_bullets=narrative,
        patterns=patterns,
    )
 def generate_all_scorecards(
    period_type: PeriodType = PeriodType.daily,
    reference_date: datetime | None = None,
 ) -> list[ScorecardSummary]:
    """Generate scorecards for all tracked agents.
    Args:
        period_type: daily or weekly
        reference_date: The date to calculate from (defaults to now)
    Returns:
        List of ScorecardSummary for all agents with activity
    """
    start, end = _get_period_bounds(period_type, reference_date)
    # Collect all events
    events = _collect_events_for_period(start, end)
    # Aggregate metrics for all agents
    all_metrics = _aggregate_metrics(events)
    # Include tracked agents even if no activity
    for agent_id in TRACKED_AGENTS:
        if agent_id not in all_metrics:
            all_metrics[agent_id] = AgentMetrics(agent_id=agent_id)
    # Generate scorecards
    scorecards: list[ScorecardSummary] = []
    for agent_id, metrics in all_metrics.items():
        # Augment with token data
        tokens_earned, tokens_spent = _query_token_transactions(agent_id, start, end)
        metrics.tokens_earned = max(metrics.tokens_earned, tokens_earned)
        metrics.tokens_spent = max(metrics.tokens_spent, tokens_spent)
        narrative = _generate_narrative_bullets(metrics, period_type)
        patterns = _detect_patterns(metrics)
        scorecard = ScorecardSummary(
            agent_id=agent_id,
            period_type=period_type,
            period_start=start,
            period_end=end,
            metrics=metrics,
            narrative_bullets=narrative,
            patterns=patterns,
        )
        scorecards.append(scorecard)
    # Sort by agent_id for consistent ordering
    scorecards.sort(key=lambda s: s.agent_id)
    return scorecards
 def get_tracked_agents() -> list[str]:
    """Return the list of tracked agent IDs."""
    return sorted(TRACKED_AGENTS)
--- a/src/dashboard/templates/base.html
+++ b/src/dashboard/templates/base.html
@@ -51,6 +51,7 @@
          <a href="/thinking" class="mc-test-link mc-link-thinking">THINKING</a>
          <a href="/swarm/mission-control" class="mc-test-link">MISSION CTRL</a>
          <a href="/swarm/live" class="mc-test-link">SWARM</a>
          <a href="/scorecards" class="mc-test-link">SCORECARDS</a>
          <a href="/bugs" class="mc-test-link mc-link-bugs">BUGS</a>
        </div>
      </div>
@@ -123,6 +124,7 @@
    <a href="/thinking" class="mc-mobile-link">THINKING</a>
    <a href="/swarm/mission-control" class="mc-mobile-link">MISSION CONTROL</a>
    <a href="/swarm/live" class="mc-mobile-link">SWARM</a>
    <a href="/scorecards" class="mc-mobile-link">SCORECARDS</a>
    <a href="/bugs" class="mc-mobile-link">BUGS</a>
    <div class="mc-mobile-section-label">INTELLIGENCE</div>
    <a href="/spark/ui" class="mc-mobile-link">SPARK</a>
--- a/src/dashboard/templates/scorecards.html
+++ b/src/dashboard/templates/scorecards.html
@@ -0,0 +1,113 @@
 {% extends "base.html" %}
 {% block title %}Agent Scorecards - Timmy Time{% endblock %}
 {% block extra_styles %}{% endblock %}
 {% block content %}
 <div class="container-fluid py-4">
  <!-- Header -->
  <div class="d-flex justify-content-between align-items-center mb-4">
    <div>
      <h1 class="h3 mb-0">AGENT SCORECARDS</h1>
      <p class="text-muted small mb-0">Track agent performance across issues, PRs, tests, and tokens</p>
    </div>
    <div class="d-flex gap-2">
      <select id="period-select" class="form-select form-select-sm" style="width: auto;">
        <option value="daily" selected>Daily</option>
        <option value="weekly">Weekly</option>
      </select>
      <button class="btn btn-sm btn-primary" onclick="refreshScorecards()">
        <span>Refresh</span>
      </button>
    </div>
  </div>
  <!-- Scorecards Grid -->
  <div id="scorecards-container"
       hx-get="/scorecards/all/panels?period=daily"
       hx-trigger="load"
       hx-swap="innerHTML">
    <div class="text-center py-5">
      <div class="spinner-border text-secondary" role="status">
        <span class="visually-hidden">Loading...</span>
      </div>
      <p class="text-muted mt-2">Loading scorecards...</p>
    </div>
  </div>
  <!-- API Reference -->
  <div class="mt-5 pt-4 border-top">
    <h5 class="text-muted">API Reference</h5>
    <div class="row g-3">
      <div class="col-md-6">
        <div class="card mc-panel">
          <div class="card-body">
            <h6 class="card-title">List Tracked Agents</h6>
            <code>GET /scorecards/api/agents</code>
            <p class="small text-muted mt-2">Returns all tracked agent IDs</p>
          </div>
        </div>
      </div>
      <div class="col-md-6">
        <div class="card mc-panel">
          <div class="card-body">
            <h6 class="card-title">Get All Scorecards</h6>
            <code>GET /scorecards/api?period=daily|weekly</code>
            <p class="small text-muted mt-2">Returns scorecards for all agents</p>
          </div>
        </div>
      </div>
      <div class="col-md-6">
        <div class="card mc-panel">
          <div class="card-body">
            <h6 class="card-title">Get Agent Scorecard</h6>
            <code>GET /scorecards/api/{agent_id}?period=daily|weekly</code>
            <p class="small text-muted mt-2">Returns scorecard for a specific agent</p>
          </div>
        </div>
      </div>
      <div class="col-md-6">
        <div class="card mc-panel">
          <div class="card-body">
            <h6 class="card-title">HTML Panel (HTMX)</h6>
            <code>GET /scorecards/panel/{agent_id}?period=daily|weekly</code>
            <p class="small text-muted mt-2">Returns HTML panel for embedding</p>
          </div>
        </div>
      </div>
    </div>
  </div>
 </div>
 <script>
 // Period selector change handler
 document.getElementById('period-select').addEventListener('change', function() {
  refreshScorecards();
 });
 function refreshScorecards() {
  var period = document.getElementById('period-select').value;
  var container = document.getElementById('scorecards-container');
  // Show loading state
  container.innerHTML = `
    <div class="text-center py-5">
      <div class="spinner-border text-secondary" role="status">
        <span class="visually-hidden">Loading...</span>
      </div>
      <p class="text-muted mt-2">Loading scorecards...</p>
    </div>
  `;
  // Trigger HTMX request
  htmx.ajax('GET', '/scorecards/all/panels?period=' + period, {
    target: '#scorecards-container',
    swap: 'innerHTML'
  });
 }
 // Auto-refresh every 5 minutes
 setInterval(refreshScorecards, 300000);
 </script>
 {% endblock %}
--- a/tests/dashboard/test_scorecards.py
+++ b/tests/dashboard/test_scorecards.py
@@ -0,0 +1,680 @@
 """Tests for agent scorecard functionality."""
 from datetime import UTC, datetime, timedelta
 from unittest.mock import MagicMock, patch
 from dashboard.services.scorecard_service import (
    AgentMetrics,
    PeriodType,
    ScorecardSummary,
    _aggregate_metrics,
    _detect_patterns,
    _extract_actor_from_event,
    _generate_narrative_bullets,
    _get_period_bounds,
    _is_tracked_agent,
    _query_token_transactions,
    generate_all_scorecards,
    generate_scorecard,
    get_tracked_agents,
 )
 from infrastructure.events.bus import Event
 class TestPeriodBounds:
    """Test period boundary calculations."""
    def test_daily_period_bounds(self):
        """Test daily period returns correct 24-hour window."""
        reference = datetime(2026, 3, 21, 12, 30, 45, tzinfo=UTC)
        start, end = _get_period_bounds(PeriodType.daily, reference)
        assert end == datetime(2026, 3, 21, 0, 0, 0, tzinfo=UTC)
        assert start == datetime(2026, 3, 20, 0, 0, 0, tzinfo=UTC)
        assert (end - start) == timedelta(days=1)
    def test_weekly_period_bounds(self):
        """Test weekly period returns correct 7-day window."""
        reference = datetime(2026, 3, 21, 12, 30, 45, tzinfo=UTC)
        start, end = _get_period_bounds(PeriodType.weekly, reference)
        assert end == datetime(2026, 3, 21, 0, 0, 0, tzinfo=UTC)
        assert start == datetime(2026, 3, 14, 0, 0, 0, tzinfo=UTC)
        assert (end - start) == timedelta(days=7)
    def test_default_reference_date(self):
        """Test default reference date uses current time."""
        start, end = _get_period_bounds(PeriodType.daily)
        now = datetime.now(UTC)
        # End should be start of current day (midnight)
        expected_end = now.replace(hour=0, minute=0, second=0, microsecond=0)
        assert end == expected_end
        # Start should be 24 hours before end
        assert (end - start) == timedelta(days=1)
 class TestTrackedAgents:
    """Test agent tracking functions."""
    def test_get_tracked_agents(self):
        """Test get_tracked_agents returns sorted list."""
        agents = get_tracked_agents()
        assert isinstance(agents, list)
        assert "kimi" in agents
        assert "claude" in agents
        assert "gemini" in agents
        assert "hermes" in agents
        assert "manus" in agents
        assert agents == sorted(agents)
    def test_is_tracked_agent_true(self):
        """Test _is_tracked_agent returns True for tracked agents."""
        assert _is_tracked_agent("kimi") is True
        assert _is_tracked_agent("KIMI") is True  # case insensitive
        assert _is_tracked_agent("claude") is True
        assert _is_tracked_agent("hermes") is True
    def test_is_tracked_agent_false(self):
        """Test _is_tracked_agent returns False for untracked agents."""
        assert _is_tracked_agent("unknown") is False
        assert _is_tracked_agent("rockachopa") is False
        assert _is_tracked_agent("") is False
 class TestExtractActor:
    """Test actor extraction from events."""
    def test_extract_from_actor_field(self):
        """Test extraction from data.actor field."""
        event = Event(type="test", source="system", data={"actor": "kimi"})
        assert _extract_actor_from_event(event) == "kimi"
    def test_extract_from_agent_id_field(self):
        """Test extraction from data.agent_id field."""
        event = Event(type="test", source="system", data={"agent_id": "claude"})
        assert _extract_actor_from_event(event) == "claude"
    def test_extract_from_source_fallback(self):
        """Test fallback to event.source."""
        event = Event(type="test", source="gemini", data={})
        assert _extract_actor_from_event(event) == "gemini"
    def test_actor_priority_over_agent_id(self):
        """Test actor field takes priority over agent_id."""
        event = Event(type="test", source="system", data={"actor": "kimi", "agent_id": "claude"})
        assert _extract_actor_from_event(event) == "kimi"
 class TestAggregateMetrics:
    """Test metrics aggregation from events."""
    def test_empty_events(self):
        """Test aggregation with no events returns empty dict."""
        result = _aggregate_metrics([])
        assert result == {}
    def test_push_event_aggregation(self):
        """Test push events aggregate commits correctly."""
        events = [
            Event(type="gitea.push", source="gitea", data={"actor": "kimi", "num_commits": 3}),
            Event(type="gitea.push", source="gitea", data={"actor": "kimi", "num_commits": 2}),
        ]
        result = _aggregate_metrics(events)
        assert "kimi" in result
        assert result["kimi"].commits == 5
    def test_issue_opened_aggregation(self):
        """Test issue opened events aggregate correctly."""
        events = [
            Event(
                type="gitea.issue.opened",
                source="gitea",
                data={"actor": "claude", "issue_number": 100},
            ),
            Event(
                type="gitea.issue.opened",
                source="gitea",
                data={"actor": "claude", "issue_number": 101},
            ),
        ]
        result = _aggregate_metrics(events)
        assert "claude" in result
        assert len(result["claude"].issues_touched) == 2
        assert 100 in result["claude"].issues_touched
        assert 101 in result["claude"].issues_touched
    def test_comment_aggregation(self):
        """Test comment events aggregate correctly."""
        events = [
            Event(
                type="gitea.issue.comment",
                source="gitea",
                data={"actor": "gemini", "issue_number": 100},
            ),
            Event(
                type="gitea.issue.comment",
                source="gitea",
                data={"actor": "gemini", "issue_number": 101},
            ),
        ]
        result = _aggregate_metrics(events)
        assert "gemini" in result
        assert result["gemini"].comments == 2
        assert len(result["gemini"].issues_touched) == 2  # Comments touch issues too
    def test_pr_events_aggregation(self):
        """Test PR open and merge events aggregate correctly."""
        events = [
            Event(
                type="gitea.pull_request",
                source="gitea",
                data={"actor": "kimi", "pr_number": 50, "action": "opened"},
            ),
            Event(
                type="gitea.pull_request",
                source="gitea",
                data={"actor": "kimi", "pr_number": 50, "action": "closed", "merged": True},
            ),
            Event(
                type="gitea.pull_request",
                source="gitea",
                data={"actor": "kimi", "pr_number": 51, "action": "opened"},
            ),
        ]
        result = _aggregate_metrics(events)
        assert "kimi" in result
        assert len(result["kimi"].prs_opened) == 2
        assert len(result["kimi"].prs_merged) == 1
        assert 50 in result["kimi"].prs_merged
    def test_untracked_agent_filtered(self):
        """Test events from untracked agents are filtered out."""
        events = [
            Event(
                type="gitea.push", source="gitea", data={"actor": "rockachopa", "num_commits": 5}
            ),
        ]
        result = _aggregate_metrics(events)
        assert "rockachopa" not in result
    def test_task_completion_aggregation(self):
        """Test task completion events aggregate test files."""
        events = [
            Event(
                type="agent.task.completed",
                source="gitea",
                data={
                    "agent_id": "kimi",
                    "tests_affected": ["test_foo.py", "test_bar.py"],
                    "token_reward": 10,
                },
            ),
        ]
        result = _aggregate_metrics(events)
        assert "kimi" in result
        assert len(result["kimi"].tests_affected) == 2
        assert "test_foo.py" in result["kimi"].tests_affected
        assert result["kimi"].tokens_earned == 10
 class TestAgentMetrics:
    """Test AgentMetrics class."""
    def test_merge_rate_zero_prs(self):
        """Test merge rate is 0 when no PRs opened."""
        metrics = AgentMetrics(agent_id="kimi")
        assert metrics.pr_merge_rate == 0.0
    def test_merge_rate_perfect(self):
        """Test 100% merge rate calculation."""
        metrics = AgentMetrics(agent_id="kimi", prs_opened={1, 2, 3}, prs_merged={1, 2, 3})
        assert metrics.pr_merge_rate == 1.0
    def test_merge_rate_partial(self):
        """Test partial merge rate calculation."""
        metrics = AgentMetrics(agent_id="kimi", prs_opened={1, 2, 3, 4}, prs_merged={1, 2})
        assert metrics.pr_merge_rate == 0.5
 class TestDetectPatterns:
    """Test pattern detection logic."""
    def test_high_merge_rate_pattern(self):
        """Test detection of high merge rate pattern."""
        metrics = AgentMetrics(
            agent_id="kimi",
            prs_opened={1, 2, 3, 4, 5},
            prs_merged={1, 2, 3, 4},  # 80% merge rate
        )
        patterns = _detect_patterns(metrics)
        assert any("High merge rate" in p for p in patterns)
    def test_low_merge_rate_pattern(self):
        """Test detection of low merge rate pattern."""
        metrics = AgentMetrics(
            agent_id="kimi",
            prs_opened={1, 2, 3, 4, 5},
            prs_merged={1},  # 20% merge rate
        )
        patterns = _detect_patterns(metrics)
        assert any("low merge rate" in p for p in patterns)
    def test_high_commits_no_prs_pattern(self):
        """Test detection of direct-to-main commits pattern."""
        metrics = AgentMetrics(
            agent_id="kimi",
            commits=15,
            prs_opened=set(),
        )
        patterns = _detect_patterns(metrics)
        assert any("High commit volume without PRs" in p for p in patterns)
    def test_silent_worker_pattern(self):
        """Test detection of silent worker pattern."""
        metrics = AgentMetrics(
            agent_id="kimi",
            issues_touched={1, 2, 3, 4, 5, 6},
            comments=0,
        )
        patterns = _detect_patterns(metrics)
        assert any("silent worker" in p for p in patterns)
    def test_communicative_pattern(self):
        """Test detection of highly communicative pattern."""
        metrics = AgentMetrics(
            agent_id="kimi",
            issues_touched={1, 2},  # 2 issues
            comments=10,  # 5x comments per issue
        )
        patterns = _detect_patterns(metrics)
        assert any("Highly communicative" in p for p in patterns)
    def test_token_accumulation_pattern(self):
        """Test detection of token accumulation pattern."""
        metrics = AgentMetrics(
            agent_id="kimi",
            tokens_earned=150,
            tokens_spent=10,
        )
        patterns = _detect_patterns(metrics)
        assert any("Strong token accumulation" in p for p in patterns)
    def test_token_spend_pattern(self):
        """Test detection of high token spend pattern."""
        metrics = AgentMetrics(
            agent_id="kimi",
            tokens_earned=10,
            tokens_spent=100,
        )
        patterns = _detect_patterns(metrics)
        assert any("High token spend" in p for p in patterns)
 class TestGenerateNarrative:
    """Test narrative bullet generation."""
    def test_empty_metrics_narrative(self):
        """Test narrative for empty metrics mentions no activity."""
        metrics = AgentMetrics(agent_id="kimi")
        bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
        assert len(bullets) == 1
        assert "No recorded activity" in bullets[0]
    def test_activity_summary_narrative(self):
        """Test narrative includes activity summary."""
        metrics = AgentMetrics(
            agent_id="kimi",
            commits=5,
            prs_opened={1, 2},
            prs_merged={1},
        )
        bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
        activity_bullet = next((b for b in bullets if "Active across" in b), None)
        assert activity_bullet is not None
        assert "5 commits" in activity_bullet
        assert "2 PRs opened" in activity_bullet
        assert "1 PR merged" in activity_bullet
    def test_tests_affected_narrative(self):
        """Test narrative includes tests affected."""
        metrics = AgentMetrics(
            agent_id="kimi",
            tests_affected={"test_a.py", "test_b.py"},
        )
        bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
        assert any("2 test files" in b for b in bullets)
    def test_tokens_earned_narrative(self):
        """Test narrative includes token earnings."""
        metrics = AgentMetrics(
            agent_id="kimi",
            tokens_earned=100,
            tokens_spent=20,
        )
        bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
        assert any("Net earned 80 tokens" in b for b in bullets)
    def test_tokens_spent_narrative(self):
        """Test narrative includes token spending."""
        metrics = AgentMetrics(
            agent_id="kimi",
            tokens_earned=20,
            tokens_spent=100,
        )
        bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
        assert any("Net spent 80 tokens" in b for b in bullets)
    def test_balanced_tokens_narrative(self):
        """Test narrative for balanced token flow."""
        metrics = AgentMetrics(
            agent_id="kimi",
            tokens_earned=100,
            tokens_spent=100,
        )
        bullets = _generate_narrative_bullets(metrics, PeriodType.daily)
        assert any("Balanced token flow" in b for b in bullets)
 class TestScorecardSummary:
    """Test ScorecardSummary dataclass."""
    def test_to_dict_structure(self):
        """Test to_dict returns expected structure."""
        metrics = AgentMetrics(
            agent_id="kimi",
            issues_touched={1, 2},
            prs_opened={10, 11},
            prs_merged={10},
            tokens_earned=100,
            tokens_spent=20,
        )
        summary = ScorecardSummary(
            agent_id="kimi",
            period_type=PeriodType.daily,
            period_start=datetime.now(UTC),
            period_end=datetime.now(UTC),
            metrics=metrics,
            narrative_bullets=["Test bullet"],
            patterns=["Test pattern"],
        )
        data = summary.to_dict()
        assert data["agent_id"] == "kimi"
        assert data["period_type"] == "daily"
        assert "metrics" in data
        assert data["metrics"]["issues_touched"] == 2
        assert data["metrics"]["prs_opened"] == 2
        assert data["metrics"]["prs_merged"] == 1
        assert data["metrics"]["pr_merge_rate"] == 0.5
        assert data["metrics"]["tokens_earned"] == 100
        assert data["metrics"]["token_net"] == 80
        assert data["narrative_bullets"] == ["Test bullet"]
        assert data["patterns"] == ["Test pattern"]
 class TestQueryTokenTransactions:
    """Test token transaction querying."""
    def test_empty_ledger(self):
        """Test empty ledger returns zero values."""
        with patch("lightning.ledger.get_transactions", return_value=[]):
            earned, spent = _query_token_transactions("kimi", datetime.now(UTC), datetime.now(UTC))
            assert earned == 0
            assert spent == 0
    def test_ledger_with_transactions(self):
        """Test ledger aggregation of transactions."""
        now = datetime.now(UTC)
        mock_tx = [
            MagicMock(
                agent_id="kimi",
                tx_type=MagicMock(value="incoming"),
                amount_sats=100,
                created_at=now.isoformat(),
            ),
            MagicMock(
                agent_id="kimi",
                tx_type=MagicMock(value="outgoing"),
                amount_sats=30,
                created_at=now.isoformat(),
            ),
        ]
        with patch("lightning.ledger.get_transactions", return_value=mock_tx):
            earned, spent = _query_token_transactions(
                "kimi", now - timedelta(hours=1), now + timedelta(hours=1)
            )
            assert earned == 100
            assert spent == 30
    def test_ledger_filters_by_agent(self):
        """Test ledger filters transactions by agent_id."""
        now = datetime.now(UTC)
        mock_tx = [
            MagicMock(
                agent_id="claude",
                tx_type=MagicMock(value="incoming"),
                amount_sats=100,
                created_at=now.isoformat(),
            ),
        ]
        with patch("lightning.ledger.get_transactions", return_value=mock_tx):
            earned, spent = _query_token_transactions(
                "kimi", now - timedelta(hours=1), now + timedelta(hours=1)
            )
            assert earned == 0  # Transaction was for claude, not kimi
    def test_ledger_filters_by_time(self):
        """Test ledger filters transactions by time range."""
        now = datetime.now(UTC)
        old_time = now - timedelta(days=2)
        mock_tx = [
            MagicMock(
                agent_id="kimi",
                tx_type=MagicMock(value="incoming"),
                amount_sats=100,
                created_at=old_time.isoformat(),
            ),
        ]
        with patch("lightning.ledger.get_transactions", return_value=mock_tx):
            # Query for today only
            earned, spent = _query_token_transactions(
                "kimi", now - timedelta(hours=1), now + timedelta(hours=1)
            )
            assert earned == 0  # Transaction was 2 days ago
 class TestGenerateScorecard:
    """Test scorecard generation."""
    def test_generate_scorecard_no_activity(self):
        """Test scorecard generation for agent with no activity."""
        with patch(
            "dashboard.services.scorecard_service._collect_events_for_period", return_value=[]
        ):
            with patch(
                "dashboard.services.scorecard_service._query_token_transactions",
                return_value=(0, 0),
            ):
                scorecard = generate_scorecard("kimi", PeriodType.daily)
        assert scorecard is not None
        assert scorecard.agent_id == "kimi"
        assert scorecard.period_type == PeriodType.daily
        assert len(scorecard.narrative_bullets) == 1
        assert "No recorded activity" in scorecard.narrative_bullets[0]
    def test_generate_scorecard_with_activity(self):
        """Test scorecard generation includes activity."""
        events = [
            Event(type="gitea.push", source="gitea", data={"actor": "kimi", "num_commits": 5}),
        ]
        with patch(
            "dashboard.services.scorecard_service._collect_events_for_period", return_value=events
        ):
            with patch(
                "dashboard.services.scorecard_service._query_token_transactions",
                return_value=(100, 20),
            ):
                scorecard = generate_scorecard("kimi", PeriodType.daily)
        assert scorecard is not None
        assert scorecard.metrics.commits == 5
        assert scorecard.metrics.tokens_earned == 100
        assert scorecard.metrics.tokens_spent == 20
 class TestGenerateAllScorecards:
    """Test generating scorecards for all agents."""
    def test_generates_for_all_tracked_agents(self):
        """Test all tracked agents get scorecards even with no activity."""
        with patch(
            "dashboard.services.scorecard_service._collect_events_for_period", return_value=[]
        ):
            with patch(
                "dashboard.services.scorecard_service._query_token_transactions",
                return_value=(0, 0),
            ):
                scorecards = generate_all_scorecards(PeriodType.daily)
        agent_ids = {s.agent_id for s in scorecards}
        expected = {"kimi", "claude", "gemini", "hermes", "manus"}
        assert expected.issubset(agent_ids)
    def test_scorecards_sorted(self):
        """Test scorecards are sorted by agent_id."""
        with patch(
            "dashboard.services.scorecard_service._collect_events_for_period", return_value=[]
        ):
            with patch(
                "dashboard.services.scorecard_service._query_token_transactions",
                return_value=(0, 0),
            ):
                scorecards = generate_all_scorecards(PeriodType.daily)
        agent_ids = [s.agent_id for s in scorecards]
        assert agent_ids == sorted(agent_ids)
 class TestScorecardRoutes:
    """Test scorecard API routes."""
    def test_list_agents_endpoint(self, client):
        """Test GET /scorecards/api/agents returns tracked agents."""
        response = client.get("/scorecards/api/agents")
        assert response.status_code == 200
        data = response.json()
        assert "agents" in data
        assert "kimi" in data["agents"]
        assert "claude" in data["agents"]
    def test_get_scorecard_endpoint(self, client):
        """Test GET /scorecards/api/{agent_id} returns scorecard."""
        with patch("dashboard.routes.scorecards.generate_scorecard") as mock_generate:
            mock_generate.return_value = ScorecardSummary(
                agent_id="kimi",
                period_type=PeriodType.daily,
                period_start=datetime.now(UTC),
                period_end=datetime.now(UTC),
                metrics=AgentMetrics(agent_id="kimi"),
                narrative_bullets=["Test bullet"],
                patterns=[],
            )
            response = client.get("/scorecards/api/kimi?period=daily")
        assert response.status_code == 200
        data = response.json()
        assert data["agent_id"] == "kimi"
        assert data["period_type"] == "daily"
    def test_get_scorecard_invalid_period(self, client):
        """Test GET with invalid period returns 400."""
        response = client.get("/scorecards/api/kimi?period=invalid")
        assert response.status_code == 400
        assert "error" in response.json()
    def test_get_all_scorecards_endpoint(self, client):
        """Test GET /scorecards/api returns all scorecards."""
        with patch("dashboard.routes.scorecards.generate_all_scorecards") as mock_generate:
            mock_generate.return_value = [
                ScorecardSummary(
                    agent_id="kimi",
                    period_type=PeriodType.daily,
                    period_start=datetime.now(UTC),
                    period_end=datetime.now(UTC),
                    metrics=AgentMetrics(agent_id="kimi"),
                    narrative_bullets=[],
                    patterns=[],
                ),
            ]
            response = client.get("/scorecards/api?period=daily")
        assert response.status_code == 200
        data = response.json()
        assert data["period"] == "daily"
        assert "scorecards" in data
        assert len(data["scorecards"]) == 1
    def test_scorecards_page_renders(self, client):
        """Test GET /scorecards returns HTML page."""
        response = client.get("/scorecards")
        assert response.status_code == 200
        assert "text/html" in response.headers.get("content-type", "")
        assert "AGENT SCORECARDS" in response.text
    def test_scorecard_panel_renders(self, client):
        """Test GET /scorecards/panel/{agent_id} returns HTML."""
        with patch("dashboard.routes.scorecards.generate_scorecard") as mock_generate:
            mock_generate.return_value = ScorecardSummary(
                agent_id="kimi",
                period_type=PeriodType.daily,
                period_start=datetime.now(UTC),
                period_end=datetime.now(UTC),
                metrics=AgentMetrics(agent_id="kimi", commits=5),
                narrative_bullets=["Active across 5 commits this day."],
                patterns=["High activity"],
            )
            response = client.get("/scorecards/panel/kimi?period=daily")
        assert response.status_code == 200
        assert "text/html" in response.headers.get("content-type", "")
        assert "Kimi" in response.text
    def test_all_panels_renders(self, client):
        """Test GET /scorecards/all/panels returns HTML with all panels."""
        with patch("dashboard.routes.scorecards.generate_all_scorecards") as mock_generate:
            mock_generate.return_value = [
                ScorecardSummary(
                    agent_id="kimi",
                    period_type=PeriodType.daily,
                    period_start=datetime.now(UTC),
                    period_end=datetime.now(UTC),
                    metrics=AgentMetrics(agent_id="kimi"),
                    narrative_bullets=[],
                    patterns=[],
                ),
            ]
            response = client.get("/scorecards/all/panels?period=daily")
        assert response.status_code == 200
        assert "text/html" in response.headers.get("content-type", "")
--- a/tests/timmy_automations/test_weekly_narrative.py
+++ b/tests/timmy_automations/test_weekly_narrative.py
@@ -1,343 +0,0 @@
 """Tests for weekly_narrative.py script."""
 from __future__ import annotations
 import json
 import sys
 from datetime import UTC, datetime, timedelta
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 # Add timmy_automations to path for imports
 sys.path.insert(
    0, str(Path(__file__).resolve().parent.parent.parent / "timmy_automations" / "daily_run")
 )
 import weekly_narrative as wn
 class TestParseTimestamp:
    """Test timestamp parsing."""
    def test_parse_iso_with_z(self):
        """Parse ISO timestamp with Z suffix."""
        result = wn.parse_ts("2026-03-21T12:00:00Z")
        assert result is not None
        assert result.year == 2026
        assert result.month == 3
        assert result.day == 21
    def test_parse_iso_with_offset(self):
        """Parse ISO timestamp with timezone offset."""
        result = wn.parse_ts("2026-03-21T12:00:00+00:00")
        assert result is not None
        assert result.year == 2026
    def test_parse_empty_string(self):
        """Empty string returns None."""
        result = wn.parse_ts("")
        assert result is None
    def test_parse_invalid_string(self):
        """Invalid string returns None."""
        result = wn.parse_ts("not-a-timestamp")
        assert result is None
 class TestCollectCyclesData:
    """Test cycle data collection."""
    def test_no_cycles_file(self, tmp_path):
        """Handle missing cycles file gracefully."""
        with patch.object(wn, "REPO_ROOT", tmp_path):
            since = datetime.now(UTC) - timedelta(days=7)
            result = wn.collect_cycles_data(since)
            assert result["total"] == 0
            assert result["successes"] == 0
            assert result["failures"] == 0
    def test_collect_recent_cycles(self, tmp_path):
        """Collect cycles within lookback period."""
        retro_dir = tmp_path / ".loop" / "retro"
        retro_dir.mkdir(parents=True)
        now = datetime.now(UTC)
        cycles = [
            {"timestamp": now.isoformat(), "success": True, "cycle": 1},
            {"timestamp": now.isoformat(), "success": False, "cycle": 2},
            {"timestamp": (now - timedelta(days=10)).isoformat(), "success": True, "cycle": 3},
        ]
        with open(retro_dir / "cycles.jsonl", "w") as f:
            for c in cycles:
                f.write(json.dumps(c) + "\n")
        with patch.object(wn, "REPO_ROOT", tmp_path):
            since = now - timedelta(days=7)
            result = wn.collect_cycles_data(since)
            assert result["total"] == 2  # Only recent 2
            assert result["successes"] == 1
            assert result["failures"] == 1
 class TestExtractThemes:
    """Test theme extraction from issues."""
    def test_extract_layer_labels(self):
        """Extract layer labels from issues."""
        issues = [
            {"labels": [{"name": "layer:triage"}, {"name": "bug"}]},
            {"labels": [{"name": "layer:tests"}, {"name": "bug"}]},
            {"labels": [{"name": "layer:triage"}, {"name": "feature"}]},
        ]
        result = wn.extract_themes(issues)
        assert len(result["layers"]) == 2
        layer_names = {layer["name"] for layer in result["layers"]}
        assert "triage" in layer_names
        assert "tests" in layer_names
    def test_extract_type_labels(self):
        """Extract type labels (bug/feature/etc)."""
        issues = [
            {"labels": [{"name": "bug"}]},
            {"labels": [{"name": "feature"}]},
            {"labels": [{"name": "bug"}]},
        ]
        result = wn.extract_themes(issues)
        type_names = {t_type["name"] for t_type in result["types"]}
        assert "bug" in type_names
        assert "feature" in type_names
    def test_empty_issues(self):
        """Handle empty issue list."""
        result = wn.extract_themes([])
        assert result["layers"] == []
        assert result["types"] == []
        assert result["top_labels"] == []
 class TestExtractAgentContributions:
    """Test agent contribution extraction."""
    def test_extract_assignees(self):
        """Extract assignee counts."""
        issues = [
            {"assignee": {"login": "kimi"}},
            {"assignee": {"login": "hermes"}},
            {"assignee": {"login": "kimi"}},
        ]
        result = wn.extract_agent_contributions(issues, [], [])
        assert len(result["active_assignees"]) == 2
        assignee_logins = {a["login"] for a in result["active_assignees"]}  # noqa: E741
        assert "kimi" in assignee_logins
        assert "hermes" in assignee_logins
    def test_extract_pr_authors(self):
        """Extract PR author counts."""
        prs = [
            {"user": {"login": "kimi"}},
            {"user": {"login": "claude"}},
            {"user": {"login": "kimi"}},
        ]
        result = wn.extract_agent_contributions([], prs, [])
        assert len(result["pr_authors"]) == 2
    def test_kimi_mentions_in_cycles(self):
        """Count Kimi mentions in cycle notes."""
        cycles = [
            {"notes": "Kimi did great work", "reason": ""},
            {"notes": "", "reason": "Kimi timeout"},
            {"notes": "All good", "reason": ""},
        ]
        result = wn.extract_agent_contributions([], [], cycles)
        assert result["kimi_mentioned_cycles"] == 2
 class TestAnalyzeTestShifts:
    """Test test pattern analysis."""
    def test_no_cycles(self):
        """Handle no cycle data."""
        result = wn.analyze_test_shifts([])
        assert "note" in result
    def test_test_metrics(self):
        """Calculate test metrics from cycles."""
        cycles = [
            {"tests_passed": 100, "tests_added": 5},
            {"tests_passed": 150, "tests_added": 3},
        ]
        result = wn.analyze_test_shifts(cycles)
        assert result["total_tests_passed"] == 250
        assert result["total_tests_added"] == 8
 class TestGenerateVibeSummary:
    """Test vibe summary generation."""
    def test_productive_vibe(self):
        """High success rate and activity = productive vibe."""
        cycles_data = {"success_rate": 0.95, "successes": 10, "failures": 1}
        issues_data = {"closed_count": 5}
        result = wn.generate_vibe_summary(cycles_data, issues_data, {}, {"layers": []}, {}, {}, {})
        assert result["overall"] == "productive"
        assert "strong week" in result["description"].lower()
    def test_struggling_vibe(self):
        """More failures than successes = struggling vibe."""
        cycles_data = {"success_rate": 0.3, "successes": 3, "failures": 7}
        issues_data = {"closed_count": 0}
        result = wn.generate_vibe_summary(cycles_data, issues_data, {}, {"layers": []}, {}, {}, {})
        assert result["overall"] == "struggling"
    def test_quiet_vibe(self):
        """Low activity = quiet vibe."""
        cycles_data = {"success_rate": 0.0, "successes": 0, "failures": 0}
        issues_data = {"closed_count": 0}
        result = wn.generate_vibe_summary(cycles_data, issues_data, {}, {"layers": []}, {}, {}, {})
        assert result["overall"] == "quiet"
 class TestGenerateMarkdownSummary:
    """Test markdown summary generation."""
    def test_includes_header(self):
        """Markdown includes header."""
        narrative = {
            "period": {"start": "2026-03-14T00:00:00", "end": "2026-03-21T00:00:00"},
            "vibe": {"overall": "productive", "description": "Good week"},
            "activity": {
                "cycles": {"total": 10, "successes": 9, "failures": 1},
                "issues": {"closed": 5, "opened": 3},
                "pull_requests": {"merged": 4, "opened": 2},
            },
        }
        result = wn.generate_markdown_summary(narrative)
        assert "# Weekly Narrative Summary" in result
        assert "productive" in result.lower()
        assert "10 total" in result or "10" in result
    def test_includes_focus_areas(self):
        """Markdown includes focus areas when present."""
        narrative = {
            "period": {"start": "2026-03-14", "end": "2026-03-21"},
            "vibe": {
                "overall": "productive",
                "description": "Good week",
                "focus_areas": ["triage (5 items)", "tests (3 items)"],
            },
            "activity": {
                "cycles": {"total": 0, "successes": 0, "failures": 0},
                "issues": {"closed": 0, "opened": 0},
                "pull_requests": {"merged": 0, "opened": 0},
            },
        }
        result = wn.generate_markdown_summary(narrative)
        assert "Focus Areas" in result
        assert "triage" in result
 class TestConfigLoading:
    """Test configuration loading."""
    def test_default_config(self, tmp_path):
        """Default config when manifest missing."""
        with patch.object(wn, "CONFIG_PATH", tmp_path / "nonexistent.json"):
            config = wn.load_automation_config()
            assert config["lookback_days"] == 7
            assert config["enabled"] is True
    def test_environment_override(self, tmp_path):
        """Environment variables override config."""
        with patch.dict("os.environ", {"TIMMY_WEEKLY_NARRATIVE_ENABLED": "false"}):
            with patch.object(wn, "CONFIG_PATH", tmp_path / "nonexistent.json"):
                config = wn.load_automation_config()
                assert config["enabled"] is False
 class TestMain:
    """Test main function."""
    def test_disabled_exits_cleanly(self, tmp_path):
        """When disabled and no --force, exits cleanly."""
        with patch.object(wn, "REPO_ROOT", tmp_path):
            with patch.object(wn, "load_automation_config", return_value={"enabled": False}):
                with patch("sys.argv", ["weekly_narrative"]):
                    result = wn.main()
                    assert result == 0
    def test_force_runs_when_disabled(self, tmp_path):
        """--force runs even when disabled."""
        # Setup minimal structure
        (tmp_path / ".loop" / "retro").mkdir(parents=True)
        with patch.object(wn, "REPO_ROOT", tmp_path):
            with patch.object(
                wn,
                "load_automation_config",
                return_value={
                    "enabled": False,
                    "lookback_days": 7,
                    "gitea_api": "http://localhost:3000/api/v1",
                    "repo_slug": "test/repo",
                    "token_file": "~/.hermes/gitea_token",
                },
            ):
                with patch.object(wn, "GiteaClient") as mock_client:
                    mock_instance = MagicMock()
                    mock_instance.is_available.return_value = False
                    mock_client.return_value = mock_instance
                    with patch("sys.argv", ["weekly_narrative", "--force"]):
                        result = wn.main()
                        # Should complete without error even though Gitea unavailable
                        assert result == 0
 class TestGiteaClient:
    """Test Gitea API client."""
    def test_is_available_when_unavailable(self):
        """is_available returns False when server down."""
        config = {"gitea_api": "http://localhost:99999", "repo_slug": "test/repo"}
        client = wn.GiteaClient(config, None)
        # Should return False without raising
        assert client.is_available() is False
    def test_headers_with_token(self):
        """Headers include Authorization when token provided."""
        config = {"gitea_api": "http://localhost:3000", "repo_slug": "test/repo"}
        client = wn.GiteaClient(config, "test-token")
        headers = client._headers()
        assert headers["Authorization"] == "token test-token"
    def test_headers_without_token(self):
        """Headers don't include Authorization when no token."""
        config = {"gitea_api": "http://localhost:3000", "repo_slug": "test/repo"}
        client = wn.GiteaClient(config, None)
        headers = client._headers()
        assert "Authorization" not in headers
--- a/timmy_automations/config/automations.json
+++ b/timmy_automations/config/automations.json
@@ -228,27 +228,6 @@
        "max_items": 5
      },
      "outputs": []
    },
    {
      "id": "weekly_narrative",
      "name": "Weekly Narrative Summary",
      "description": "Generates a human-readable weekly summary of work themes, agent contributions, and token economy shifts",
      "script": "timmy_automations/daily_run/weekly_narrative.py",
      "category": "daily_run",
      "enabled": true,
      "trigger": "scheduled",
      "schedule": "weekly",
      "executable": "python3",
      "config": {
        "lookback_days": 7,
        "output_file": ".loop/weekly_narrative.json",
        "gitea_api": "http://localhost:3000/api/v1",
        "repo_slug": "rockachopa/Timmy-time-dashboard"
      },
      "outputs": [
        ".loop/weekly_narrative.json",
        ".loop/weekly_narrative.md"
      ]
    }
  ]
 }
--- a/timmy_automations/config/daily_run.json
+++ b/timmy_automations/config/daily_run.json
@@ -17,10 +17,6 @@
    "manual": {
      "description": "Run on-demand only",
      "automations": ["agent_workspace", "kimi_bootstrap", "kimi_resume", "backfill_retro"]
    },
    "weekly": {
      "description": "Run once per week (Sundays)",
      "automations": ["weekly_narrative"]
    }
  },
  "triggers": {
--- a/timmy_automations/daily_run/weekly_narrative.py
+++ b/timmy_automations/daily_run/weekly_narrative.py
@@ -1,745 +0,0 @@
 #!/usr/bin/env python3
 """Weekly narrative summary generator — human-readable loop analysis.
 Analyzes the past week's activity across the development loop to produce
 a narrative summary of:
 - What changed (themes, areas of focus)
 - How agents and Timmy contributed
 - Any shifts in tests, triage, or token economy
 The output is designed to be skimmable — a quick read that gives context
 on the week's progress without drowning in metrics.
 Run:  python3 timmy_automations/daily_run/weekly_narrative.py [--json]
 Env:  See timmy_automations/config/automations.json for configuration
 Refs: #719
 """
 from __future__ import annotations
 import argparse
 import json
 import os
 import sys
 from collections import Counter
 from datetime import UTC, datetime, timedelta
 from pathlib import Path
 from typing import Any
 from urllib.error import HTTPError, URLError
 from urllib.request import Request, urlopen
 # ── Configuration ─────────────────────────────────────────────────────────
 REPO_ROOT = Path(__file__).resolve().parent.parent.parent
 CONFIG_PATH = Path(__file__).parent.parent / "config" / "automations.json"
 DEFAULT_CONFIG = {
    "gitea_api": "http://localhost:3000/api/v1",
    "repo_slug": "rockachopa/Timmy-time-dashboard",
    "token_file": "~/.hermes/gitea_token",
    "lookback_days": 7,
    "output_file": ".loop/weekly_narrative.json",
    "enabled": True,
 }
 # ── Data Loading ───────────────────────────────────────────────────────────
 def load_automation_config() -> dict:
    """Load configuration for weekly_narrative from automations manifest."""
    config = DEFAULT_CONFIG.copy()
    if CONFIG_PATH.exists():
        try:
            manifest = json.loads(CONFIG_PATH.read_text())
            for auto in manifest.get("automations", []):
                if auto.get("id") == "weekly_narrative":
                    config.update(auto.get("config", {}))
                    config["enabled"] = auto.get("enabled", True)
                    break
        except (json.JSONDecodeError, OSError) as exc:
            print(f"[weekly_narrative] Warning: Could not load config: {exc}", file=sys.stderr)
    # Environment variable overrides
    if os.environ.get("TIMMY_GITEA_API"):
        config["gitea_api"] = os.environ.get("TIMMY_GITEA_API")
    if os.environ.get("TIMMY_REPO_SLUG"):
        config["repo_slug"] = os.environ.get("TIMMY_REPO_SLUG")
    if os.environ.get("TIMMY_GITEA_TOKEN"):
        config["token"] = os.environ.get("TIMMY_GITEA_TOKEN")
    if os.environ.get("TIMMY_WEEKLY_NARRATIVE_ENABLED"):
        config["enabled"] = os.environ.get("TIMMY_WEEKLY_NARRATIVE_ENABLED", "true").lower() == "true"
    return config
 def get_token(config: dict) -> str | None:
    """Get Gitea token from environment or file."""
    if "token" in config:
        return config["token"]
    token_file = Path(config["token_file"]).expanduser()
    if token_file.exists():
        return token_file.read_text().strip()
    return None
 def load_jsonl(path: Path) -> list[dict]:
    """Load a JSONL file, skipping bad lines."""
    if not path.exists():
        return []
    entries = []
    for line in path.read_text().strip().splitlines():
        try:
            entries.append(json.loads(line))
        except (json.JSONDecodeError, ValueError):
            continue
    return entries
 def parse_ts(ts_str: str) -> datetime | None:
    """Parse an ISO timestamp, tolerating missing tz."""
    if not ts_str:
        return None
    try:
        dt = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=UTC)
        return dt
    except (ValueError, TypeError):
        return None
 # ── Gitea API Client ───────────────────────────────────────────────────────
 class GiteaClient:
    """Simple Gitea API client with graceful degradation."""
    def __init__(self, config: dict, token: str | None):
        self.api_base = config["gitea_api"].rstrip("/")
        self.repo_slug = config["repo_slug"]
        self.token = token
        self._available: bool | None = None
    def _headers(self) -> dict:
        headers = {"Accept": "application/json"}
        if self.token:
            headers["Authorization"] = f"token {self.token}"
        return headers
    def _api_url(self, path: str) -> str:
        return f"{self.api_base}/repos/{self.repo_slug}/{path}"
    def is_available(self) -> bool:
        """Check if Gitea API is reachable."""
        if self._available is not None:
            return self._available
        try:
            req = Request(
                f"{self.api_base}/version",
                headers=self._headers(),
                method="GET",
            )
            with urlopen(req, timeout=5) as resp:
                self._available = resp.status == 200
                return self._available
        except (HTTPError, URLError, TimeoutError):
            self._available = False
            return False
    def get_paginated(self, path: str, params: dict | None = None) -> list:
        """Fetch all pages of a paginated endpoint."""
        all_items = []
        page = 1
        limit = 50
        while True:
            url = self._api_url(path)
            query_parts = [f"limit={limit}", f"page={page}"]
            if params:
                for key, val in params.items():
                    query_parts.append(f"{key}={val}")
            url = f"{url}?{'&'.join(query_parts)}"
            req = Request(url, headers=self._headers(), method="GET")
            with urlopen(req, timeout=15) as resp:
                batch = json.loads(resp.read())
            if not batch:
                break
            all_items.extend(batch)
            if len(batch) < limit:
                break
            page += 1
        return all_items
 # ── Data Collection ────────────────────────────────────────────────────────
 def collect_cycles_data(since: datetime) -> dict:
    """Load cycle retrospective data from the lookback period."""
    cycles_file = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
    if not cycles_file.exists():
        return {"cycles": [], "total": 0, "successes": 0, "failures": 0}
    entries = load_jsonl(cycles_file)
    recent = []
    for e in entries:
        ts = parse_ts(e.get("timestamp", ""))
        if ts and ts >= since:
            recent.append(e)
    successes = [e for e in recent if e.get("success")]
    failures = [e for e in recent if not e.get("success")]
    return {
        "cycles": recent,
        "total": len(recent),
        "successes": len(successes),
        "failures": len(failures),
        "success_rate": round(len(successes) / len(recent), 2) if recent else 0,
    }
 def collect_issues_data(client: GiteaClient, since: datetime) -> dict:
    """Collect issue activity from Gitea."""
    if not client.is_available():
        return {"error": "Gitea unavailable", "issues": [], "closed": [], "opened": []}
    try:
        issues = client.get_paginated("issues", {"state": "all", "sort": "updated", "limit": 100})
    except (HTTPError, URLError) as exc:
        return {"error": str(exc), "issues": [], "closed": [], "opened": []}
    touched = []
    closed = []
    opened = []
    for issue in issues:
        updated_at = issue.get("updated_at", "")
        created_at = issue.get("created_at", "")
        updated = parse_ts(updated_at)
        created = parse_ts(created_at)
        if updated and updated >= since:
            touched.append(issue)
        if issue.get("state") == "closed":
            closed_at = issue.get("closed_at", "")
            closed_dt = parse_ts(closed_at)
            if closed_dt and closed_dt >= since:
                closed.append(issue)
        elif created and created >= since:
            opened.append(issue)
    return {
        "issues": touched,
        "closed": closed,
        "opened": opened,
        "touched_count": len(touched),
        "closed_count": len(closed),
        "opened_count": len(opened),
    }
 def collect_prs_data(client: GiteaClient, since: datetime) -> dict:
    """Collect PR activity from Gitea."""
    if not client.is_available():
        return {"error": "Gitea unavailable", "prs": [], "merged": [], "opened": []}
    try:
        prs = client.get_paginated("pulls", {"state": "all", "sort": "updated", "limit": 100})
    except (HTTPError, URLError) as exc:
        return {"error": str(exc), "prs": [], "merged": [], "opened": []}
    touched = []
    merged = []
    opened = []
    for pr in prs:
        updated_at = pr.get("updated_at", "")
        created_at = pr.get("created_at", "")
        merged_at = pr.get("merged_at", "")
        updated = parse_ts(updated_at)
        created = parse_ts(created_at)
        merged_dt = parse_ts(merged_at) if merged_at else None
        if updated and updated >= since:
            touched.append(pr)
        if pr.get("merged") and merged_dt and merged_dt >= since:
            merged.append(pr)
        elif created and created >= since:
            opened.append(pr)
    return {
        "prs": touched,
        "merged": merged,
        "opened": opened,
        "touched_count": len(touched),
        "merged_count": len(merged),
        "opened_count": len(opened),
    }
 def collect_triage_data(since: datetime) -> dict:
    """Load triage and introspection data."""
    triage_file = REPO_ROOT / ".loop" / "retro" / "triage.jsonl"
    insights_file = REPO_ROOT / ".loop" / "retro" / "insights.json"
    triage_entries = load_jsonl(triage_file)
    recent_triage = [
        e for e in triage_entries
        if parse_ts(e.get("timestamp", "")) and parse_ts(e.get("timestamp", "")) >= since
    ]
    insights = {}
    if insights_file.exists():
        try:
            insights = json.loads(insights_file.read_text())
        except (json.JSONDecodeError, OSError):
            pass
    return {
        "triage_runs": len(recent_triage),
        "triage_entries": recent_triage,
        "latest_insights": insights,
    }
 def collect_token_data(since: datetime) -> dict:
    """Load token economy data from the lightning ledger."""
    # The ledger is in-memory but we can look for any persisted data
    # For now, return placeholder that will be filled by the ledger module
    return {
        "note": "Token economy data is ephemeral — check dashboard for live metrics",
        "balance_sats": 0,  # Placeholder
        "transactions_week": 0,
    }
 # ── Analysis Functions ─────────────────────────────────────────────────────
 def extract_themes(issues: list[dict]) -> list[dict]:
    """Extract themes from issue labels."""
    label_counts = Counter()
    layer_counts = Counter()
    type_counts = Counter()
    for issue in issues:
        for label in issue.get("labels", []):
            name = label.get("name", "")
            label_counts[name] += 1
            if name.startswith("layer:"):
                layer_counts[name.replace("layer:", "")] += 1
            if name in ("bug", "feature", "refactor", "docs", "test", "chore"):
                type_counts[name] += 1
    # Top themes (labels excluding layer prefixes)
    themes = [
        {"name": name, "count": count}
        for name, count in label_counts.most_common(10)
        if not name.startswith(("layer:", "size:"))
    ]
    # Layers
    layers = [
        {"name": name, "count": count}
        for name, count in layer_counts.most_common()
    ]
    # Types
    types = [
        {"name": name, "count": count}
        for name, count in type_counts.most_common()
    ]
    return {
        "top_labels": themes,
        "layers": layers,
        "types": types,
    }
 def extract_agent_contributions(issues: list[dict], prs: list[dict], cycles: list[dict]) -> dict:
    """Extract agent contribution patterns."""
    # Count by assignee
    assignee_counts = Counter()
    for issue in issues:
        assignee = issue.get("assignee")
        if assignee and isinstance(assignee, dict):
            assignee_counts[assignee.get("login", "unknown")] += 1
    # Count PR authors
    pr_authors = Counter()
    for pr in prs:
        user = pr.get("user")
        if user and isinstance(user, dict):
            pr_authors[user.get("login", "unknown")] += 1
    # Check for Kimi mentions in cycle notes
    kimi_mentions = sum(
        1 for c in cycles
        if "kimi" in c.get("notes", "").lower() or "kimi" in c.get("reason", "").lower()
    )
    return {
        "active_assignees": [
            {"login": login, "issues_count": count}
            for login, count in assignee_counts.most_common()
        ],
        "pr_authors": [
            {"login": login, "prs_count": count}
            for login, count in pr_authors.most_common()
        ],
        "kimi_mentioned_cycles": kimi_mentions,
    }
 def analyze_test_shifts(cycles: list[dict]) -> dict:
    """Analyze shifts in test patterns."""
    if not cycles:
        return {"note": "No cycle data available"}
    total_tests_passed = sum(c.get("tests_passed", 0) for c in cycles)
    total_tests_added = sum(c.get("tests_added", 0) for c in cycles)
    avg_tests_per_cycle = round(total_tests_passed / len(cycles), 1) if cycles else 0
    # Look for test-related issues
    test_focused = [
        c for c in cycles
        if c.get("type") == "test" or "test" in c.get("notes", "").lower()
    ]
    return {
        "total_tests_passed": total_tests_passed,
        "total_tests_added": total_tests_added,
        "avg_tests_per_cycle": avg_tests_per_cycle,
        "test_focused_cycles": len(test_focused),
    }
 def analyze_triage_shifts(triage_data: dict) -> dict:
    """Analyze shifts in triage patterns."""
    insights = triage_data.get("latest_insights", {})
    recommendations = insights.get("recommendations", [])
    high_priority_recs = [
        r for r in recommendations
        if r.get("severity") == "high"
    ]
    return {
        "triage_runs": triage_data.get("triage_runs", 0),
        "insights_generated": insights.get("generated_at") is not None,
        "high_priority_recommendations": len(high_priority_recs),
        "recent_recommendations": recommendations[:3] if recommendations else [],
    }
 def generate_vibe_summary(
    cycles_data: dict,
    issues_data: dict,
    prs_data: dict,
    themes: dict,
    agent_contrib: dict,
    test_shifts: dict,
    triage_shifts: dict,
 ) -> dict:
    """Generate the human-readable 'vibe' summary."""
    # Determine overall vibe
    success_rate = cycles_data.get("success_rate", 0)
    failures = cycles_data.get("failures", 0)
    closed_count = issues_data.get("closed_count", 0)
    merged_count = prs_data.get("merged_count", 0)
    if success_rate >= 0.9 and closed_count > 0:
        vibe = "productive"
        vibe_description = "A strong week with solid delivery and healthy success rates."
    elif success_rate >= 0.7:
        vibe = "steady"
        vibe_description = "Steady progress with some bumps. Things are moving forward."
    elif failures > cycles_data.get("successes", 0):
        vibe = "struggling"
        vibe_description = "A challenging week with more failures than successes. Time to regroup."
    else:
        vibe = "quiet"
        vibe_description = "A lighter week with limited activity."
    # Focus areas from themes
    focus_areas = []
    for layer in themes.get("layers", [])[:3]:
        focus_areas.append(f"{layer['name']} ({layer['count']} items)")
    # Agent activity summary
    agent_summary = ""
    active_assignees = agent_contrib.get("active_assignees", [])
    if active_assignees:
        top_agent = active_assignees[0]
        agent_summary = f"{top_agent['login']} led with {top_agent['issues_count']} assigned issues."
    # Notable events
    notable = []
    if merged_count > 5:
        notable.append(f"{merged_count} PRs merged — high integration velocity")
    if triage_shifts.get("high_priority_recommendations", 0) > 0:
        notable.append("High-priority recommendations from loop introspection")
    if test_shifts.get("test_focused_cycles", 0) > 3:
        notable.append("Strong test coverage focus")
    if not notable:
        notable.append("Regular development flow")
    return {
        "overall": vibe,
        "description": vibe_description,
        "focus_areas": focus_areas,
        "agent_summary": agent_summary,
        "notable_events": notable,
    }
 # ── Narrative Generation ───────────────────────────────────────────────────
 def generate_narrative(
    cycles_data: dict,
    issues_data: dict,
    prs_data: dict,
    triage_data: dict,
    themes: dict,
    agent_contrib: dict,
    test_shifts: dict,
    triage_shifts: dict,
    token_data: dict,
    since: datetime,
    until: datetime,
 ) -> dict:
    """Generate the complete weekly narrative."""
    vibe = generate_vibe_summary(
        cycles_data, issues_data, prs_data, themes, agent_contrib, test_shifts, triage_shifts
    )
    return {
        "generated_at": datetime.now(UTC).isoformat(),
        "period": {
            "start": since.isoformat(),
            "end": until.isoformat(),
            "days": 7,
        },
        "vibe": vibe,
        "activity": {
            "cycles": {
                "total": cycles_data.get("total", 0),
                "successes": cycles_data.get("successes", 0),
                "failures": cycles_data.get("failures", 0),
                "success_rate": cycles_data.get("success_rate", 0),
            },
            "issues": {
                "touched": issues_data.get("touched_count", 0),
                "closed": issues_data.get("closed_count", 0),
                "opened": issues_data.get("opened_count", 0),
            },
            "pull_requests": {
                "touched": prs_data.get("touched_count", 0),
                "merged": prs_data.get("merged_count", 0),
                "opened": prs_data.get("opened_count", 0),
            },
        },
        "themes": themes,
        "agents": agent_contrib,
        "test_health": test_shifts,
        "triage_health": triage_shifts,
        "token_economy": token_data,
    }
 def generate_markdown_summary(narrative: dict) -> str:
    """Generate a human-readable markdown summary."""
    vibe = narrative.get("vibe", {})
    activity = narrative.get("activity", {})
    cycles = activity.get("cycles", {})
    issues = activity.get("issues", {})
    prs = activity.get("pull_requests", {})
    lines = [
        "# Weekly Narrative Summary",
        "",
        f"**Period:** {narrative['period']['start'][:10]} to {narrative['period']['end'][:10]}",
        f"**Vibe:** {vibe.get('overall', 'unknown').title()}",
        "",
        f"{vibe.get('description', '')}",
        "",
        "## Activity Highlights",
        "",
        f"- **Development Cycles:** {cycles.get('total', 0)} total ({cycles.get('successes', 0)} success, {cycles.get('failures', 0)} failure)",
        f"- **Issues:** {issues.get('closed', 0)} closed, {issues.get('opened', 0)} opened",
        f"- **Pull Requests:** {prs.get('merged', 0)} merged, {prs.get('opened', 0)} opened",
        "",
    ]
    # Focus areas
    focus = vibe.get("focus_areas", [])
    if focus:
        lines.append("## Focus Areas")
        lines.append("")
        for area in focus:
            lines.append(f"- {area}")
        lines.append("")
    # Agent contributions
    agent_summary = vibe.get("agent_summary", "")
    if agent_summary:
        lines.append("## Agent Activity")
        lines.append("")
        lines.append(agent_summary)
        lines.append("")
    # Notable events
    notable = vibe.get("notable_events", [])
    if notable:
        lines.append("## Notable Events")
        lines.append("")
        for event in notable:
            lines.append(f"- {event}")
        lines.append("")
    # Triage health
    triage = narrative.get("triage_health", {})
    if triage.get("high_priority_recommendations", 0) > 0:
        lines.append("## Triage Notes")
        lines.append("")
        lines.append(f"⚠️ {triage['high_priority_recommendations']} high-priority recommendation(s) from loop introspection.")
        lines.append("")
        for rec in triage.get("recent_recommendations", [])[:2]:
            lines.append(f"- **{rec.get('category', 'general')}:** {rec.get('finding', '')}")
        lines.append("")
    return "\n".join(lines)
 # ── Main ───────────────────────────────────────────────────────────────────
 def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(
        description="Generate weekly narrative summary of work and vibes",
    )
    p.add_argument(
        "--json", "-j",
        action="store_true",
        help="Output as JSON instead of markdown",
    )
    p.add_argument(
        "--output", "-o",
        type=str,
        default=None,
        help="Output file path (default from config)",
    )
    p.add_argument(
        "--days",
        type=int,
        default=None,
        help="Override lookback days (default 7)",
    )
    p.add_argument(
        "--force",
        action="store_true",
        help="Run even if disabled in config",
    )
    return p.parse_args()
 def main() -> int:
    args = parse_args()
    config = load_automation_config()
    # Check if enabled
    if not config.get("enabled", True) and not args.force:
        print("[weekly_narrative] Skipped — weekly narrative is disabled in config")
        print("[weekly_narrative] Use --force to run anyway")
        return 0
    # Determine lookback period
    days = args.days if args.days is not None else config.get("lookback_days", 7)
    until = datetime.now(UTC)
    since = until - timedelta(days=days)
    print(f"[weekly_narrative] Generating narrative for the past {days} days...")
    # Setup Gitea client
    token = get_token(config)
    client = GiteaClient(config, token)
    if not client.is_available():
        print("[weekly_narrative] Warning: Gitea API unavailable — will use local data only")
    # Collect data
    cycles_data = collect_cycles_data(since)
    issues_data = collect_issues_data(client, since)
    prs_data = collect_prs_data(client, since)
    triage_data = collect_triage_data(since)
    token_data = collect_token_data(since)
    # Analyze
    themes = extract_themes(issues_data.get("issues", []))
    agent_contrib = extract_agent_contributions(
        issues_data.get("issues", []),
        prs_data.get("prs", []),
        cycles_data.get("cycles", []),
    )
    test_shifts = analyze_test_shifts(cycles_data.get("cycles", []))
    triage_shifts = analyze_triage_shifts(triage_data)
    # Generate narrative
    narrative = generate_narrative(
        cycles_data,
        issues_data,
        prs_data,
        triage_data,
        themes,
        agent_contrib,
        test_shifts,
        triage_shifts,
        token_data,
        since,
        until,
    )
    # Determine output path
    output_path = args.output or config.get("output_file", ".loop/weekly_narrative.json")
    output_file = REPO_ROOT / output_path
    output_file.parent.mkdir(parents=True, exist_ok=True)
    # Write JSON output
    output_file.write_text(json.dumps(narrative, indent=2) + "\n")
    # Write markdown summary alongside JSON
    md_output_file = output_file.with_suffix(".md")
    md_output_file.write_text(generate_markdown_summary(narrative))
    # Print output
    if args.json:
        print(json.dumps(narrative, indent=2))
    else:
        print()
        print(generate_markdown_summary(narrative))
    print(f"\n[weekly_narrative] Written to: {output_file}")
    print(f"[weekly_narrative] Markdown summary: {md_output_file}")
    return 0
 if __name__ == "__main__":
    sys.exit(main())