diff --git a/docs/BACKLOG_TRIAGE_2026-03-23.md b/docs/BACKLOG_TRIAGE_2026-03-23.md new file mode 100644 index 00000000..08516ba5 --- /dev/null +++ b/docs/BACKLOG_TRIAGE_2026-03-23.md @@ -0,0 +1,91 @@ +# Deep Backlog Triage — Harness vs Infrastructure Separation + +**Date:** March 23, 2026 +**Analyst:** Perplexity Computer +**Executor:** Claude (Opus 4.6) +**Issue:** #1076 + +--- + +## Summary of Actions Taken + +### 1. Batch Closed: 17 Rejected-Direction Issues + +OpenClaw rejected direction + superseded autoresearch: +#663, #722, #723, #724, #725, #726, #727, #728, #729, #730, #731, +#903, #904, #911, #926, #927, #950 + +All labeled `rejected-direction`. + +### 2. Closed: 2 Duplicate Issues + +- #867 — duplicate of #887 (Morrowind feasibility study) +- #916 — duplicate of #931 (test_setup_script.py fixes) + +Both labeled `duplicate`. + +### 3. Labels Created + +| Label | Color | Purpose | +|-------|-------|---------| +| `harness` | Red | Core product: agent framework | +| `infrastructure` | Blue | Supporting stage: dashboard, CI/CD | +| `p0-critical` | Red | Must fix now | +| `p1-important` | Orange | Next sprint | +| `p2-backlog` | Gold | When time permits | +| `rejected-direction` | Gray | Closed: rejected/superseded | +| `duplicate` | Light gray | Duplicate of another issue | +| `gemini-review` | Purple | Auto-generated, needs review | +| `consolidation` | Green | Part of a consolidation epic | +| `morrowind` | Brown | Harness: Morrowind embodiment | +| `heartbeat` | Crimson | Harness: Agent heartbeat loop | +| `inference` | Orange-red | Harness: Inference/model routing | +| `sovereignty` | Indigo | Harness: Sovereignty stack | +| `memory-session` | Teal | Harness: Memory/session | +| `deprioritized` | Dark gray | Not blocking P0 work | + +### 4. Consolidation Epics Created + +- **#1077** — [EPIC] Kimi-Tasks Code Hygiene (14 issues consolidated) +- **#1078** — [EPIC] ASCII Video Showcase (6 issues consolidated) + +### 5. Labels Applied + +- **P0 Heartbeat** — 16 issues labeled `harness` + `p0-critical` + `heartbeat` +- **P0 Inference** — 10 issues labeled `harness` + `p0-critical` + `inference` +- **P0 Memory/Session** — 3 issues labeled `harness` + `p0-critical` + `memory-session` +- **P1 Morrowind** — 63 issues labeled `harness` + `p1-important` + `morrowind` +- **P1 Sovereignty** — 11 issues labeled `harness` + `p1-important` + `sovereignty` +- **P1 SOUL/Persona** — 2 issues labeled `harness` + `p1-important` +- **P1 Testing** — 4 issues labeled `harness` + `p1-important` +- **P2 LHF** — 3 issues labeled `harness` + `p2-backlog` +- **P2 Whitestone** — 9 issues labeled `harness` + `p2-backlog` +- **Infrastructure** — 36 issues labeled `infrastructure` + `deprioritized` +- **Philosophy** — 44 issues labeled `philosophy` +- **Gemini Review** — 15 issues labeled `gemini-review` +- **Consolidation** — 20 issues labeled `consolidation` + +### 6. Gemini Issues (15) — Tagged for Review + +#577, #578, #579, #1006, #1007, #1008, #1009, #1010, #1012, #1013, +#1014, #1016, #1017, #1018, #1019 + +Labeled `gemini-review` for human review of alignment with harness-first strategy. + +--- + +## Domain Breakdown + +| Domain | Count | % | +|--------|-------|---| +| **HARNESS (The Product)** | 219 | 75% | +| **INFRASTRUCTURE (The Stage)** | 39 | 13% | +| **CLOSE: Rejected Direction** | 17 | 6% | +| **UNCATEGORIZED** | 18 | 6% | + +## P0 Priority Stack (Harness) + +1. **Heartbeat v2** — Agent loop + WorldInterface (PR #900) +2. **Inference Cascade** — Local model routing (#966, #1064-#1069, #1075) +3. **Session Crystallization** — Memory/handoff (#982, #983-#986) +4. **Perception Pipeline** — Game state extraction (#963-#965, #1008) diff --git a/src/infrastructure/guards/moderation.py b/src/infrastructure/guards/moderation.py index 7af53c24..d157f056 100644 --- a/src/infrastructure/guards/moderation.py +++ b/src/infrastructure/guards/moderation.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) class ModerationVerdict(Enum): """Result of a moderation check.""" - PASS = "pass" + PASS = "pass" # noqa: S105 FAIL = "fail" ERROR = "error" @@ -285,9 +285,7 @@ class ContentModerator: cleaned = pattern.sub("[GAME_TERM]", cleaned) return cleaned - async def _run_guard( - self, text: str, profile: GameProfile - ) -> ModerationResult: + async def _run_guard(self, text: str, profile: GameProfile) -> ModerationResult: """Layer 2: Run LLM guard model or fall back to regex.""" if not settings.moderation_enabled: return ModerationResult( @@ -326,8 +324,7 @@ class ContentModerator: data = await resp.json() models = [m.get("name", "") for m in data.get("models", [])] self._guard_available = any( - self._guard_model in m or m.startswith(self._guard_model) - for m in models + self._guard_model in m or m.startswith(self._guard_model) for m in models ) if not self._guard_available: logger.info( diff --git a/src/infrastructure/sovereignty_metrics.py b/src/infrastructure/sovereignty_metrics.py index a305fa65..ceeb2cbb 100644 --- a/src/infrastructure/sovereignty_metrics.py +++ b/src/infrastructure/sovereignty_metrics.py @@ -242,8 +242,7 @@ class SovereigntyMetricsStore: ).fetchall() else: rows = conn.execute( - "SELECT * FROM sovereignty_alerts " - "ORDER BY timestamp DESC LIMIT ?", + "SELECT * FROM sovereignty_alerts ORDER BY timestamp DESC LIMIT ?", (limit,), ).fetchall() return [dict(row) for row in rows] diff --git a/src/timmy/agentic_loop.py b/src/timmy/agentic_loop.py index e13f404b..14b52bf2 100644 --- a/src/timmy/agentic_loop.py +++ b/src/timmy/agentic_loop.py @@ -215,6 +215,119 @@ def _summarize(result: AgenticResult, total_steps: int, was_truncated: bool) -> result.status = "completed" +# --------------------------------------------------------------------------- +# Execution orchestrator +# --------------------------------------------------------------------------- + + +async def _execute_all_steps( + agent, + task: str, + task_id: str, + steps: list[str], + total_steps: int, + session_id: str, + result: AgenticResult, + on_progress: Callable | None, +) -> list[str]: + """Execute all planned steps, handling failures with adaptation. + + Appends AgenticStep objects to *result.steps* and returns the list + of completed-result strings (used as context for later steps). + """ + completed_results: list[str] = [] + + for i, step_desc in enumerate(steps, 1): + step_start = time.monotonic() + try: + step = await _execute_step( + agent, + task, + step_desc, + i, + total_steps, + completed_results, + session_id, + ) + result.steps.append(step) + completed_results.append(f"Step {i}: {step.result[:200]}") + await _broadcast_progress( + "agentic.step_complete", + { + "task_id": task_id, + "step": i, + "total": total_steps, + "description": step_desc, + "result": step.result[:200], + }, + ) + if on_progress: + await on_progress(step_desc, i, total_steps) + + except Exception as exc: # broad catch intentional: agent.run can raise any error + logger.warning("Agentic loop step %d failed: %s", i, exc) + step = await _handle_step_failure( + agent, + step_desc, + i, + total_steps, + task_id, + exc, + step_start, + session_id, + result, + completed_results, + on_progress, + ) + + return completed_results + + +async def _handle_step_failure( + agent, + step_desc: str, + step_num: int, + total_steps: int, + task_id: str, + exc: Exception, + step_start: float, + session_id: str, + result: AgenticResult, + completed_results: list[str], + on_progress: Callable | None, +) -> None: + """Try to adapt a failed step; record a hard failure if adaptation also fails.""" + try: + step = await _adapt_step(agent, step_desc, step_num, exc, step_start, session_id) + result.steps.append(step) + completed_results.append(f"Step {step_num} (adapted): {step.result[:200]}") + await _broadcast_progress( + "agentic.step_adapted", + { + "task_id": task_id, + "step": step_num, + "total": total_steps, + "description": step_desc, + "error": str(exc), + "adaptation": step.result[:200], + }, + ) + if on_progress: + await on_progress(f"[Adapted] {step_desc}", step_num, total_steps) + except Exception as adapt_exc: # broad catch intentional + logger.error("Agentic loop adaptation also failed: %s", adapt_exc) + result.steps.append( + AgenticStep( + step_num=step_num, + description=step_desc, + result=f"Failed: {exc}; Adaptation also failed: {adapt_exc}", + status="failed", + duration_ms=int((time.monotonic() - step_start) * 1000), + ) + ) + completed_results.append(f"Step {step_num}: FAILED") + + # --------------------------------------------------------------------------- # Core loop # --------------------------------------------------------------------------- @@ -265,65 +378,9 @@ async def run_agentic_loop( ) # Phase 2: Execution - completed_results: list[str] = [] - for i, step_desc in enumerate(steps, 1): - step_start = time.monotonic() - try: - step = await _execute_step( - agent, - task, - step_desc, - i, - total_steps, - completed_results, - session_id, - ) - result.steps.append(step) - completed_results.append(f"Step {i}: {step.result[:200]}") - await _broadcast_progress( - "agentic.step_complete", - { - "task_id": task_id, - "step": i, - "total": total_steps, - "description": step_desc, - "result": step.result[:200], - }, - ) - if on_progress: - await on_progress(step_desc, i, total_steps) - - except Exception as exc: # broad catch intentional: agent.run can raise any error - logger.warning("Agentic loop step %d failed: %s", i, exc) - try: - step = await _adapt_step(agent, step_desc, i, exc, step_start, session_id) - result.steps.append(step) - completed_results.append(f"Step {i} (adapted): {step.result[:200]}") - await _broadcast_progress( - "agentic.step_adapted", - { - "task_id": task_id, - "step": i, - "total": total_steps, - "description": step_desc, - "error": str(exc), - "adaptation": step.result[:200], - }, - ) - if on_progress: - await on_progress(f"[Adapted] {step_desc}", i, total_steps) - except Exception as adapt_exc: # broad catch intentional - logger.error("Agentic loop adaptation also failed: %s", adapt_exc) - result.steps.append( - AgenticStep( - step_num=i, - description=step_desc, - result=f"Failed: {exc}; Adaptation also failed: {adapt_exc}", - status="failed", - duration_ms=int((time.monotonic() - step_start) * 1000), - ) - ) - completed_results.append(f"Step {i}: FAILED") + await _execute_all_steps( + agent, task, task_id, steps, total_steps, session_id, result, on_progress + ) # Phase 3: Summary _summarize(result, total_steps, was_truncated) diff --git a/tests/infrastructure/test_moderation.py b/tests/infrastructure/test_moderation.py index 9ac59129..45443b5e 100644 --- a/tests/infrastructure/test_moderation.py +++ b/tests/infrastructure/test_moderation.py @@ -193,9 +193,7 @@ class TestContentModerator: layer="llama_guard", category=ViolationCategory.VIOLENCE_GLORIFICATION, ) - with patch.object( - mod, "_run_guard", new_callable=AsyncMock, return_value=low_conf_result - ): + with patch.object(mod, "_run_guard", new_callable=AsyncMock, return_value=low_conf_result): result = await mod.check("sword fight scene", game="morrowind") assert result.passed assert not result.blocked @@ -212,9 +210,7 @@ class TestContentModerator: layer="llama_guard", category=ViolationCategory.REAL_WORLD_HARM, ) - with patch.object( - mod, "_run_guard", new_callable=AsyncMock, return_value=high_conf_result - ): + with patch.object(mod, "_run_guard", new_callable=AsyncMock, return_value=high_conf_result): result = await mod.check("harmful content", game="morrowind") assert result.blocked @@ -229,9 +225,7 @@ class TestContentModerator: def test_regex_passes_game_violence(self): """Regex should not flag in-game violence narration.""" mod = self._make_moderator() - result = mod._check_with_regex( - "The warrior slays the dragon with a mighty blow." - ) + result = mod._check_with_regex("The warrior slays the dragon with a mighty blow.") assert result.passed def test_regex_passes_normal_narration(self): @@ -261,10 +255,14 @@ class TestContentModerator: async def test_guard_fallback_on_error(self): """Should fall back to regex when guard model errors.""" mod = self._make_moderator() - with patch.object( - mod, "_is_guard_available", new_callable=AsyncMock, return_value=True - ), patch.object( - mod, "_check_with_guard", new_callable=AsyncMock, side_effect=RuntimeError("timeout") + with ( + patch.object(mod, "_is_guard_available", new_callable=AsyncMock, return_value=True), + patch.object( + mod, + "_check_with_guard", + new_callable=AsyncMock, + side_effect=RuntimeError("timeout"), + ), ): result = await mod.check("safe text", game="default") # Should fall back to regex and pass diff --git a/tests/infrastructure/test_sovereignty_metrics.py b/tests/infrastructure/test_sovereignty_metrics.py index 8acb4a0a..d907ea3e 100644 --- a/tests/infrastructure/test_sovereignty_metrics.py +++ b/tests/infrastructure/test_sovereignty_metrics.py @@ -132,7 +132,13 @@ class TestSovereigntyMetricsStore: def test_graduation_targets_complete(self): """All expected metric types have graduation targets.""" - expected = {"cache_hit_rate", "api_cost", "time_to_report", "human_involvement", "local_artifacts"} + expected = { + "cache_hit_rate", + "api_cost", + "time_to_report", + "human_involvement", + "local_artifacts", + } assert set(GRADUATION_TARGETS.keys()) == expected