Merge branch 'main' into claude/issue-937

2026-03-23 15:07:35 +00:00
parent dc0e6d2de7 af0963a8c7
commit 2ff51fb7ad
6 changed files with 229 additions and 81 deletions
--- a/docs/BACKLOG_TRIAGE_2026-03-23.md
+++ b/docs/BACKLOG_TRIAGE_2026-03-23.md
@@ -0,0 +1,91 @@
+# Deep Backlog Triage — Harness vs Infrastructure Separation
+
+**Date:** March 23, 2026
+**Analyst:** Perplexity Computer
+**Executor:** Claude (Opus 4.6)
+**Issue:** #1076
+
+---
+
+## Summary of Actions Taken
+
+### 1. Batch Closed: 17 Rejected-Direction Issues
+
+OpenClaw rejected direction + superseded autoresearch:
+#663, #722, #723, #724, #725, #726, #727, #728, #729, #730, #731,
+#903, #904, #911, #926, #927, #950
+
+All labeled `rejected-direction`.
+
+### 2. Closed: 2 Duplicate Issues
+
+- #867 — duplicate of #887 (Morrowind feasibility study)
+- #916 — duplicate of #931 (test_setup_script.py fixes)
+
+Both labeled `duplicate`.
+
+### 3. Labels Created
+
+| Label | Color | Purpose |
+|-------|-------|---------|
+| `harness` | Red | Core product: agent framework |
+| `infrastructure` | Blue | Supporting stage: dashboard, CI/CD |
+| `p0-critical` | Red | Must fix now |
+| `p1-important` | Orange | Next sprint |
+| `p2-backlog` | Gold | When time permits |
+| `rejected-direction` | Gray | Closed: rejected/superseded |
+| `duplicate` | Light gray | Duplicate of another issue |
+| `gemini-review` | Purple | Auto-generated, needs review |
+| `consolidation` | Green | Part of a consolidation epic |
+| `morrowind` | Brown | Harness: Morrowind embodiment |
+| `heartbeat` | Crimson | Harness: Agent heartbeat loop |
+| `inference` | Orange-red | Harness: Inference/model routing |
+| `sovereignty` | Indigo | Harness: Sovereignty stack |
+| `memory-session` | Teal | Harness: Memory/session |
+| `deprioritized` | Dark gray | Not blocking P0 work |
+
+### 4. Consolidation Epics Created
+
+- **#1077** — [EPIC] Kimi-Tasks Code Hygiene (14 issues consolidated)
+- **#1078** — [EPIC] ASCII Video Showcase (6 issues consolidated)
+
+### 5. Labels Applied
+
+- **P0 Heartbeat** — 16 issues labeled `harness` + `p0-critical` + `heartbeat`
+- **P0 Inference** — 10 issues labeled `harness` + `p0-critical` + `inference`
+- **P0 Memory/Session** — 3 issues labeled `harness` + `p0-critical` + `memory-session`
+- **P1 Morrowind** — 63 issues labeled `harness` + `p1-important` + `morrowind`
+- **P1 Sovereignty** — 11 issues labeled `harness` + `p1-important` + `sovereignty`
+- **P1 SOUL/Persona** — 2 issues labeled `harness` + `p1-important`
+- **P1 Testing** — 4 issues labeled `harness` + `p1-important`
+- **P2 LHF** — 3 issues labeled `harness` + `p2-backlog`
+- **P2 Whitestone** — 9 issues labeled `harness` + `p2-backlog`
+- **Infrastructure** — 36 issues labeled `infrastructure` + `deprioritized`
+- **Philosophy** — 44 issues labeled `philosophy`
+- **Gemini Review** — 15 issues labeled `gemini-review`
+- **Consolidation** — 20 issues labeled `consolidation`
+
+### 6. Gemini Issues (15) — Tagged for Review
+
+#577, #578, #579, #1006, #1007, #1008, #1009, #1010, #1012, #1013,
+#1014, #1016, #1017, #1018, #1019
+
+Labeled `gemini-review` for human review of alignment with harness-first strategy.
+
+---
+
+## Domain Breakdown
+
+| Domain | Count | % |
+|--------|-------|---|
+| **HARNESS (The Product)** | 219 | 75% |
+| **INFRASTRUCTURE (The Stage)** | 39 | 13% |
+| **CLOSE: Rejected Direction** | 17 | 6% |
+| **UNCATEGORIZED** | 18 | 6% |
+
+## P0 Priority Stack (Harness)
+
+1. **Heartbeat v2** — Agent loop + WorldInterface (PR #900)
+2. **Inference Cascade** — Local model routing (#966, #1064-#1069, #1075)
+3. **Session Crystallization** — Memory/handoff (#982, #983-#986)
+4. **Perception Pipeline** — Game state extraction (#963-#965, #1008)
--- a/src/infrastructure/guards/moderation.py
+++ b/src/infrastructure/guards/moderation.py
@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
 class ModerationVerdict(Enum):
    """Result of a moderation check."""

-    PASS = "pass"
+    PASS = "pass"  # noqa: S105
    FAIL = "fail"
    ERROR = "error"

@@ -285,9 +285,7 @@ class ContentModerator:
            cleaned = pattern.sub("[GAME_TERM]", cleaned)
        return cleaned

-    async def _run_guard(
-        self, text: str, profile: GameProfile
-    ) -> ModerationResult:
+    async def _run_guard(self, text: str, profile: GameProfile) -> ModerationResult:
        """Layer 2: Run LLM guard model or fall back to regex."""
        if not settings.moderation_enabled:
            return ModerationResult(
@@ -326,8 +324,7 @@ class ContentModerator:
                    data = await resp.json()
                    models = [m.get("name", "") for m in data.get("models", [])]
                    self._guard_available = any(
-                        self._guard_model in m or m.startswith(self._guard_model)
-                        for m in models
+                        self._guard_model in m or m.startswith(self._guard_model) for m in models
                    )
                    if not self._guard_available:
                        logger.info(
--- a/src/infrastructure/sovereignty_metrics.py
+++ b/src/infrastructure/sovereignty_metrics.py
@@ -242,8 +242,7 @@ class SovereigntyMetricsStore:
                    ).fetchall()
                else:
                    rows = conn.execute(
-                        "SELECT * FROM sovereignty_alerts "
-                        "ORDER BY timestamp DESC LIMIT ?",
+                        "SELECT * FROM sovereignty_alerts ORDER BY timestamp DESC LIMIT ?",
                        (limit,),
                    ).fetchall()
                return [dict(row) for row in rows]
--- a/src/timmy/agentic_loop.py
+++ b/src/timmy/agentic_loop.py
@@ -215,6 +215,119 @@ def _summarize(result: AgenticResult, total_steps: int, was_truncated: bool) ->
        result.status = "completed"


+# ---------------------------------------------------------------------------
+# Execution orchestrator
+# ---------------------------------------------------------------------------
+
+
+async def _execute_all_steps(
+    agent,
+    task: str,
+    task_id: str,
+    steps: list[str],
+    total_steps: int,
+    session_id: str,
+    result: AgenticResult,
+    on_progress: Callable | None,
+) -> list[str]:
+    """Execute all planned steps, handling failures with adaptation.
+
+    Appends AgenticStep objects to *result.steps* and returns the list
+    of completed-result strings (used as context for later steps).
+    """
+    completed_results: list[str] = []
+
+    for i, step_desc in enumerate(steps, 1):
+        step_start = time.monotonic()
+        try:
+            step = await _execute_step(
+                agent,
+                task,
+                step_desc,
+                i,
+                total_steps,
+                completed_results,
+                session_id,
+            )
+            result.steps.append(step)
+            completed_results.append(f"Step {i}: {step.result[:200]}")
+            await _broadcast_progress(
+                "agentic.step_complete",
+                {
+                    "task_id": task_id,
+                    "step": i,
+                    "total": total_steps,
+                    "description": step_desc,
+                    "result": step.result[:200],
+                },
+            )
+            if on_progress:
+                await on_progress(step_desc, i, total_steps)
+
+        except Exception as exc:  # broad catch intentional: agent.run can raise any error
+            logger.warning("Agentic loop step %d failed: %s", i, exc)
+            step = await _handle_step_failure(
+                agent,
+                step_desc,
+                i,
+                total_steps,
+                task_id,
+                exc,
+                step_start,
+                session_id,
+                result,
+                completed_results,
+                on_progress,
+            )
+
+    return completed_results
+
+
+async def _handle_step_failure(
+    agent,
+    step_desc: str,
+    step_num: int,
+    total_steps: int,
+    task_id: str,
+    exc: Exception,
+    step_start: float,
+    session_id: str,
+    result: AgenticResult,
+    completed_results: list[str],
+    on_progress: Callable | None,
+) -> None:
+    """Try to adapt a failed step; record a hard failure if adaptation also fails."""
+    try:
+        step = await _adapt_step(agent, step_desc, step_num, exc, step_start, session_id)
+        result.steps.append(step)
+        completed_results.append(f"Step {step_num} (adapted): {step.result[:200]}")
+        await _broadcast_progress(
+            "agentic.step_adapted",
+            {
+                "task_id": task_id,
+                "step": step_num,
+                "total": total_steps,
+                "description": step_desc,
+                "error": str(exc),
+                "adaptation": step.result[:200],
+            },
+        )
+        if on_progress:
+            await on_progress(f"[Adapted] {step_desc}", step_num, total_steps)
+    except Exception as adapt_exc:  # broad catch intentional
+        logger.error("Agentic loop adaptation also failed: %s", adapt_exc)
+        result.steps.append(
+            AgenticStep(
+                step_num=step_num,
+                description=step_desc,
+                result=f"Failed: {exc}; Adaptation also failed: {adapt_exc}",
+                status="failed",
+                duration_ms=int((time.monotonic() - step_start) * 1000),
+            )
+        )
+        completed_results.append(f"Step {step_num}: FAILED")
+
+
 # ---------------------------------------------------------------------------
 # Core loop
 # ---------------------------------------------------------------------------
@@ -265,65 +378,9 @@ async def run_agentic_loop(
    )

    # Phase 2: Execution
-    completed_results: list[str] = []
-    for i, step_desc in enumerate(steps, 1):
-        step_start = time.monotonic()
-        try:
-            step = await _execute_step(
-                agent,
-                task,
-                step_desc,
-                i,
-                total_steps,
-                completed_results,
-                session_id,
-            )
-            result.steps.append(step)
-            completed_results.append(f"Step {i}: {step.result[:200]}")
-            await _broadcast_progress(
-                "agentic.step_complete",
-                {
-                    "task_id": task_id,
-                    "step": i,
-                    "total": total_steps,
-                    "description": step_desc,
-                    "result": step.result[:200],
-                },
-            )
-            if on_progress:
-                await on_progress(step_desc, i, total_steps)
-
-        except Exception as exc:  # broad catch intentional: agent.run can raise any error
-            logger.warning("Agentic loop step %d failed: %s", i, exc)
-            try:
-                step = await _adapt_step(agent, step_desc, i, exc, step_start, session_id)
-                result.steps.append(step)
-                completed_results.append(f"Step {i} (adapted): {step.result[:200]}")
-                await _broadcast_progress(
-                    "agentic.step_adapted",
-                    {
-                        "task_id": task_id,
-                        "step": i,
-                        "total": total_steps,
-                        "description": step_desc,
-                        "error": str(exc),
-                        "adaptation": step.result[:200],
-                    },
-                )
-                if on_progress:
-                    await on_progress(f"[Adapted] {step_desc}", i, total_steps)
-            except Exception as adapt_exc:  # broad catch intentional
-                logger.error("Agentic loop adaptation also failed: %s", adapt_exc)
-                result.steps.append(
-                    AgenticStep(
-                        step_num=i,
-                        description=step_desc,
-                        result=f"Failed: {exc}; Adaptation also failed: {adapt_exc}",
-                        status="failed",
-                        duration_ms=int((time.monotonic() - step_start) * 1000),
-                    )
-                )
-                completed_results.append(f"Step {i}: FAILED")
+    await _execute_all_steps(
+        agent, task, task_id, steps, total_steps, session_id, result, on_progress
+    )

    # Phase 3: Summary
    _summarize(result, total_steps, was_truncated)
--- a/tests/infrastructure/test_moderation.py
+++ b/tests/infrastructure/test_moderation.py
@@ -193,9 +193,7 @@ class TestContentModerator:
            layer="llama_guard",
            category=ViolationCategory.VIOLENCE_GLORIFICATION,
        )
-        with patch.object(
-            mod, "_run_guard", new_callable=AsyncMock, return_value=low_conf_result
-        ):
+        with patch.object(mod, "_run_guard", new_callable=AsyncMock, return_value=low_conf_result):
            result = await mod.check("sword fight scene", game="morrowind")
        assert result.passed
        assert not result.blocked
@@ -212,9 +210,7 @@ class TestContentModerator:
            layer="llama_guard",
            category=ViolationCategory.REAL_WORLD_HARM,
        )
-        with patch.object(
-            mod, "_run_guard", new_callable=AsyncMock, return_value=high_conf_result
-        ):
+        with patch.object(mod, "_run_guard", new_callable=AsyncMock, return_value=high_conf_result):
            result = await mod.check("harmful content", game="morrowind")
        assert result.blocked

@@ -229,9 +225,7 @@ class TestContentModerator:
    def test_regex_passes_game_violence(self):
        """Regex should not flag in-game violence narration."""
        mod = self._make_moderator()
-        result = mod._check_with_regex(
-            "The warrior slays the dragon with a mighty blow."
-        )
+        result = mod._check_with_regex("The warrior slays the dragon with a mighty blow.")
        assert result.passed

    def test_regex_passes_normal_narration(self):
@@ -261,10 +255,14 @@ class TestContentModerator:
    async def test_guard_fallback_on_error(self):
        """Should fall back to regex when guard model errors."""
        mod = self._make_moderator()
-        with patch.object(
-            mod, "_is_guard_available", new_callable=AsyncMock, return_value=True
-        ), patch.object(
-            mod, "_check_with_guard", new_callable=AsyncMock, side_effect=RuntimeError("timeout")
+        with (
+            patch.object(mod, "_is_guard_available", new_callable=AsyncMock, return_value=True),
+            patch.object(
+                mod,
+                "_check_with_guard",
+                new_callable=AsyncMock,
+                side_effect=RuntimeError("timeout"),
+            ),
        ):
            result = await mod.check("safe text", game="default")
        # Should fall back to regex and pass
--- a/tests/infrastructure/test_sovereignty_metrics.py
+++ b/tests/infrastructure/test_sovereignty_metrics.py
@@ -132,7 +132,13 @@ class TestSovereigntyMetricsStore:

    def test_graduation_targets_complete(self):
        """All expected metric types have graduation targets."""
-        expected = {"cache_hit_rate", "api_cost", "time_to_report", "human_involvement", "local_artifacts"}
+        expected = {
+            "cache_hit_rate",
+            "api_cost",
+            "time_to_report",
+            "human_involvement",
+            "local_artifacts",
+        }
        assert set(GRADUATION_TARGETS.keys()) == expected