From 7bc355eed6e28e7c641d75ce9d411e709e8a66ad Mon Sep 17 00:00:00 2001 From: hermes Date: Sun, 15 Mar 2026 14:50:09 -0400 Subject: [PATCH] [loop-cycle-61] fix: strip think tags and harden fact parsing (#237) (#254) --- src/timmy/thinking.py | 48 +++++++++++++++--- tests/timmy/test_thinking.py | 95 ++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+), 6 deletions(-) diff --git a/src/timmy/thinking.py b/src/timmy/thinking.py index e39a80d..c687567 100644 --- a/src/timmy/thinking.py +++ b/src/timmy/thinking.py @@ -19,6 +19,7 @@ Usage:: import logging import random +import re import sqlite3 import uuid from collections.abc import Generator @@ -35,6 +36,9 @@ logger = logging.getLogger(__name__) _DEFAULT_DB = Path("data/thoughts.db") +# qwen3 and other reasoning models wrap chain-of-thought in tags +_THINK_TAG_RE = re.compile(r".*?\s*", re.DOTALL) + # Sensitive patterns that must never be stored as facts _SENSITIVE_PATTERNS = [ "token", @@ -435,6 +439,9 @@ class ThinkingEngine: def _parse_facts_response(self, raw: str) -> list[str]: """Parse JSON array from LLM response, stripping markdown fences. + Resilient to models that prepend reasoning text or wrap the array in + prose. Finds the first ``[...]`` block and parses that. + Args: raw: Raw response string from the LLM. @@ -447,14 +454,39 @@ class ThinkingEngine: import json cleaned = raw.strip() + + # Strip markdown code fences if cleaned.startswith("```"): cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip() - facts = json.loads(cleaned) - if not isinstance(facts, list): - return [] + # Try direct parse first (fast path) + try: + facts = json.loads(cleaned) + if isinstance(facts, list): + return [f for f in facts if isinstance(f, str)] + except (json.JSONDecodeError, ValueError): + pass - return [f for f in facts if isinstance(f, str)] + # Fallback: extract first JSON array from the text + start = cleaned.find("[") + if start == -1: + return [] + # Walk to find the matching close bracket + depth = 0 + for i, ch in enumerate(cleaned[start:], start): + if ch == "[": + depth += 1 + elif ch == "]": + depth -= 1 + if depth == 0: + try: + facts = json.loads(cleaned[start : i + 1]) + if isinstance(facts, list): + return [f for f in facts if isinstance(f, str)] + except (json.JSONDecodeError, ValueError): + pass + break + return [] def _filter_and_store_facts(self, facts: list[str]) -> None: """Filter and store valid facts, blocking sensitive and meta content. @@ -498,7 +530,7 @@ class ThinkingEngine: if facts := self._parse_facts_response(raw): self._filter_and_store_facts(facts) except Exception as exc: - logger.debug("Thought distillation skipped: %s", exc) + logger.warning("Thought distillation failed: %s", exc) async def _maybe_file_issues(self) -> None: """Every N thoughts, classify recent thoughts and file Gitea issues. @@ -945,12 +977,16 @@ class ThinkingEngine: errors that occur when MCP stdio transports are spawned inside asyncio background tasks (#72). The thinking engine doesn't need Gitea or filesystem tools — it only needs the LLM. + + Strips ```` tags from reasoning models (qwen3, etc.) so that + downstream parsers (fact distillation, issue filing) receive clean text. """ from timmy.agent import create_timmy agent = create_timmy(skip_mcp=True) run = await agent.arun(prompt, stream=False) - return run.content if hasattr(run, "content") else str(run) + raw = run.content if hasattr(run, "content") else str(run) + return _THINK_TAG_RE.sub("", raw) if raw else raw def _store_thought(self, content: str, seed_type: str) -> Thought: """Persist a thought to SQLite.""" diff --git a/tests/timmy/test_thinking.py b/tests/timmy/test_thinking.py index b87e23f..5a0a7be 100644 --- a/tests/timmy/test_thinking.py +++ b/tests/timmy/test_thinking.py @@ -885,3 +885,98 @@ async def test_call_agent_does_not_use_session_chat(tmp_path): await engine._call_agent("prompt") mock_session_chat.assert_not_awaited() + + +@pytest.mark.asyncio +async def test_call_agent_strips_think_tags(tmp_path): + """_call_agent must strip tags from reasoning models like qwen3.""" + engine = _make_engine(tmp_path) + + mock_agent = AsyncMock() + mock_run = AsyncMock() + mock_run.content = ( + "Let me reason about this carefully..." + "The actual thought content." + ) + mock_agent.arun.return_value = mock_run + + with patch("timmy.agent.create_timmy", return_value=mock_agent): + result = await engine._call_agent("test prompt") + + assert "" not in result + assert result == "The actual thought content." + + +@pytest.mark.asyncio +async def test_call_agent_strips_multiline_think_tags(tmp_path): + """_call_agent handles multi-line blocks.""" + engine = _make_engine(tmp_path) + + mock_agent = AsyncMock() + mock_run = AsyncMock() + mock_run.content = ( + "\nStep 1: analyze\nStep 2: synthesize\n\n" + "Clean output here." + ) + mock_agent.arun.return_value = mock_run + + with patch("timmy.agent.create_timmy", return_value=mock_agent): + result = await engine._call_agent("test prompt") + + assert "" not in result + assert result == "Clean output here." + + +# --------------------------------------------------------------------------- +# _parse_facts_response resilience (#237) +# --------------------------------------------------------------------------- + + +def test_parse_facts_clean_json(tmp_path): + """Direct JSON array should parse normally.""" + engine = _make_engine(tmp_path) + result = engine._parse_facts_response('["fact one", "fact two"]') + assert result == ["fact one", "fact two"] + + +def test_parse_facts_empty_array(tmp_path): + """Empty JSON array should return empty list.""" + engine = _make_engine(tmp_path) + assert engine._parse_facts_response("[]") == [] + + +def test_parse_facts_with_prose_prefix(tmp_path): + """JSON array preceded by prose should still parse (#237).""" + engine = _make_engine(tmp_path) + raw = 'Here are the facts:\n["Alexander prefers YAML", "Timmy runs locally"]' + result = engine._parse_facts_response(raw) + assert result == ["Alexander prefers YAML", "Timmy runs locally"] + + +def test_parse_facts_with_markdown_fences(tmp_path): + """JSON wrapped in markdown code fences should parse.""" + engine = _make_engine(tmp_path) + raw = '```json\n["fact in fences"]\n```' + result = engine._parse_facts_response(raw) + assert result == ["fact in fences"] + + +def test_parse_facts_filters_non_strings(tmp_path): + """Non-string entries in the array should be filtered out.""" + engine = _make_engine(tmp_path) + result = engine._parse_facts_response('[42, "valid fact", null, true]') + assert result == ["valid fact"] + + +def test_parse_facts_none_and_empty(tmp_path): + """None and empty input should return empty list.""" + engine = _make_engine(tmp_path) + assert engine._parse_facts_response(None) == [] + assert engine._parse_facts_response("") == [] + assert engine._parse_facts_response(" ") == [] + + +def test_parse_facts_invalid_json(tmp_path): + """Totally invalid text with no JSON array should return empty list.""" + engine = _make_engine(tmp_path) + assert engine._parse_facts_response("no json here at all") == []