[loop-cycle-61] fix: strip think tags and harden fact parsing (#237) (#254)
Some checks failed
Tests / lint (push) Failing after 3s
Tests / test (push) Has been skipped

This commit was merged in pull request #254.
This commit is contained in:
2026-03-15 14:50:09 -04:00
parent f9911c002c
commit 7bc355eed6
2 changed files with 137 additions and 6 deletions

View File

@@ -19,6 +19,7 @@ Usage::
import logging
import random
import re
import sqlite3
import uuid
from collections.abc import Generator
@@ -35,6 +36,9 @@ logger = logging.getLogger(__name__)
_DEFAULT_DB = Path("data/thoughts.db")
# qwen3 and other reasoning models wrap chain-of-thought in <think> tags
_THINK_TAG_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
# Sensitive patterns that must never be stored as facts
_SENSITIVE_PATTERNS = [
"token",
@@ -435,6 +439,9 @@ class ThinkingEngine:
def _parse_facts_response(self, raw: str) -> list[str]:
"""Parse JSON array from LLM response, stripping markdown fences.
Resilient to models that prepend reasoning text or wrap the array in
prose. Finds the first ``[...]`` block and parses that.
Args:
raw: Raw response string from the LLM.
@@ -447,14 +454,39 @@ class ThinkingEngine:
import json
cleaned = raw.strip()
# Strip markdown code fences
if cleaned.startswith("```"):
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
facts = json.loads(cleaned)
if not isinstance(facts, list):
return []
# Try direct parse first (fast path)
try:
facts = json.loads(cleaned)
if isinstance(facts, list):
return [f for f in facts if isinstance(f, str)]
except (json.JSONDecodeError, ValueError):
pass
return [f for f in facts if isinstance(f, str)]
# Fallback: extract first JSON array from the text
start = cleaned.find("[")
if start == -1:
return []
# Walk to find the matching close bracket
depth = 0
for i, ch in enumerate(cleaned[start:], start):
if ch == "[":
depth += 1
elif ch == "]":
depth -= 1
if depth == 0:
try:
facts = json.loads(cleaned[start : i + 1])
if isinstance(facts, list):
return [f for f in facts if isinstance(f, str)]
except (json.JSONDecodeError, ValueError):
pass
break
return []
def _filter_and_store_facts(self, facts: list[str]) -> None:
"""Filter and store valid facts, blocking sensitive and meta content.
@@ -498,7 +530,7 @@ class ThinkingEngine:
if facts := self._parse_facts_response(raw):
self._filter_and_store_facts(facts)
except Exception as exc:
logger.debug("Thought distillation skipped: %s", exc)
logger.warning("Thought distillation failed: %s", exc)
async def _maybe_file_issues(self) -> None:
"""Every N thoughts, classify recent thoughts and file Gitea issues.
@@ -945,12 +977,16 @@ class ThinkingEngine:
errors that occur when MCP stdio transports are spawned inside asyncio
background tasks (#72). The thinking engine doesn't need Gitea or
filesystem tools — it only needs the LLM.
Strips ``<think>`` tags from reasoning models (qwen3, etc.) so that
downstream parsers (fact distillation, issue filing) receive clean text.
"""
from timmy.agent import create_timmy
agent = create_timmy(skip_mcp=True)
run = await agent.arun(prompt, stream=False)
return run.content if hasattr(run, "content") else str(run)
raw = run.content if hasattr(run, "content") else str(run)
return _THINK_TAG_RE.sub("", raw) if raw else raw
def _store_thought(self, content: str, seed_type: str) -> Thought:
"""Persist a thought to SQLite."""

View File

@@ -885,3 +885,98 @@ async def test_call_agent_does_not_use_session_chat(tmp_path):
await engine._call_agent("prompt")
mock_session_chat.assert_not_awaited()
@pytest.mark.asyncio
async def test_call_agent_strips_think_tags(tmp_path):
"""_call_agent must strip <think> tags from reasoning models like qwen3."""
engine = _make_engine(tmp_path)
mock_agent = AsyncMock()
mock_run = AsyncMock()
mock_run.content = (
"<think>Let me reason about this carefully...</think>"
"The actual thought content."
)
mock_agent.arun.return_value = mock_run
with patch("timmy.agent.create_timmy", return_value=mock_agent):
result = await engine._call_agent("test prompt")
assert "<think>" not in result
assert result == "The actual thought content."
@pytest.mark.asyncio
async def test_call_agent_strips_multiline_think_tags(tmp_path):
"""_call_agent handles multi-line <think> blocks."""
engine = _make_engine(tmp_path)
mock_agent = AsyncMock()
mock_run = AsyncMock()
mock_run.content = (
"<think>\nStep 1: analyze\nStep 2: synthesize\n</think>\n"
"Clean output here."
)
mock_agent.arun.return_value = mock_run
with patch("timmy.agent.create_timmy", return_value=mock_agent):
result = await engine._call_agent("test prompt")
assert "<think>" not in result
assert result == "Clean output here."
# ---------------------------------------------------------------------------
# _parse_facts_response resilience (#237)
# ---------------------------------------------------------------------------
def test_parse_facts_clean_json(tmp_path):
"""Direct JSON array should parse normally."""
engine = _make_engine(tmp_path)
result = engine._parse_facts_response('["fact one", "fact two"]')
assert result == ["fact one", "fact two"]
def test_parse_facts_empty_array(tmp_path):
"""Empty JSON array should return empty list."""
engine = _make_engine(tmp_path)
assert engine._parse_facts_response("[]") == []
def test_parse_facts_with_prose_prefix(tmp_path):
"""JSON array preceded by prose should still parse (#237)."""
engine = _make_engine(tmp_path)
raw = 'Here are the facts:\n["Alexander prefers YAML", "Timmy runs locally"]'
result = engine._parse_facts_response(raw)
assert result == ["Alexander prefers YAML", "Timmy runs locally"]
def test_parse_facts_with_markdown_fences(tmp_path):
"""JSON wrapped in markdown code fences should parse."""
engine = _make_engine(tmp_path)
raw = '```json\n["fact in fences"]\n```'
result = engine._parse_facts_response(raw)
assert result == ["fact in fences"]
def test_parse_facts_filters_non_strings(tmp_path):
"""Non-string entries in the array should be filtered out."""
engine = _make_engine(tmp_path)
result = engine._parse_facts_response('[42, "valid fact", null, true]')
assert result == ["valid fact"]
def test_parse_facts_none_and_empty(tmp_path):
"""None and empty input should return empty list."""
engine = _make_engine(tmp_path)
assert engine._parse_facts_response(None) == []
assert engine._parse_facts_response("") == []
assert engine._parse_facts_response(" ") == []
def test_parse_facts_invalid_json(tmp_path):
"""Totally invalid text with no JSON array should return empty list."""
engine = _make_engine(tmp_path)
assert engine._parse_facts_response("no json here at all") == []