[loop-cycle-61] fix: strip think tags and harden fact parsing (#237) (#254)

2026-03-15 14:50:09 -04:00
parent f9911c002c
commit 7bc355eed6
2 changed files with 137 additions and 6 deletions
--- a/src/timmy/thinking.py
+++ b/src/timmy/thinking.py
@@ -19,6 +19,7 @@ Usage::

 import logging
 import random
+import re
 import sqlite3
 import uuid
 from collections.abc import Generator
@@ -35,6 +36,9 @@ logger = logging.getLogger(__name__)

 _DEFAULT_DB = Path("data/thoughts.db")

+# qwen3 and other reasoning models wrap chain-of-thought in <think> tags
+_THINK_TAG_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
+
 # Sensitive patterns that must never be stored as facts
 _SENSITIVE_PATTERNS = [
    "token",
@@ -435,6 +439,9 @@ class ThinkingEngine:
    def _parse_facts_response(self, raw: str) -> list[str]:
        """Parse JSON array from LLM response, stripping markdown fences.

+        Resilient to models that prepend reasoning text or wrap the array in
+        prose.  Finds the first ``[...]`` block and parses that.
+
        Args:
            raw: Raw response string from the LLM.

@@ -447,14 +454,39 @@ class ThinkingEngine:
        import json

        cleaned = raw.strip()
+
+        # Strip markdown code fences
        if cleaned.startswith("```"):
            cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip()

-        facts = json.loads(cleaned)
-        if not isinstance(facts, list):
-            return []
+        # Try direct parse first (fast path)
+        try:
+            facts = json.loads(cleaned)
+            if isinstance(facts, list):
+                return [f for f in facts if isinstance(f, str)]
+        except (json.JSONDecodeError, ValueError):
+            pass

-        return [f for f in facts if isinstance(f, str)]
+        # Fallback: extract first JSON array from the text
+        start = cleaned.find("[")
+        if start == -1:
+            return []
+        # Walk to find the matching close bracket
+        depth = 0
+        for i, ch in enumerate(cleaned[start:], start):
+            if ch == "[":
+                depth += 1
+            elif ch == "]":
+                depth -= 1
+                if depth == 0:
+                    try:
+                        facts = json.loads(cleaned[start : i + 1])
+                        if isinstance(facts, list):
+                            return [f for f in facts if isinstance(f, str)]
+                    except (json.JSONDecodeError, ValueError):
+                        pass
+                    break
+        return []

    def _filter_and_store_facts(self, facts: list[str]) -> None:
        """Filter and store valid facts, blocking sensitive and meta content.
@@ -498,7 +530,7 @@ class ThinkingEngine:
            if facts := self._parse_facts_response(raw):
                self._filter_and_store_facts(facts)
        except Exception as exc:
-            logger.debug("Thought distillation skipped: %s", exc)
+            logger.warning("Thought distillation failed: %s", exc)

    async def _maybe_file_issues(self) -> None:
        """Every N thoughts, classify recent thoughts and file Gitea issues.
@@ -945,12 +977,16 @@ class ThinkingEngine:
        errors that occur when MCP stdio transports are spawned inside asyncio
        background tasks (#72).  The thinking engine doesn't need Gitea or
        filesystem tools — it only needs the LLM.
+
+        Strips ``<think>`` tags from reasoning models (qwen3, etc.) so that
+        downstream parsers (fact distillation, issue filing) receive clean text.
        """
        from timmy.agent import create_timmy

        agent = create_timmy(skip_mcp=True)
        run = await agent.arun(prompt, stream=False)
-        return run.content if hasattr(run, "content") else str(run)
+        raw = run.content if hasattr(run, "content") else str(run)
+        return _THINK_TAG_RE.sub("", raw) if raw else raw

    def _store_thought(self, content: str, seed_type: str) -> Thought:
        """Persist a thought to SQLite."""