forked from Rockachopa/Timmy-time-dashboard
This commit is contained in:
@@ -19,6 +19,7 @@ Usage::
|
||||
|
||||
import logging
|
||||
import random
|
||||
import re
|
||||
import sqlite3
|
||||
import uuid
|
||||
from collections.abc import Generator
|
||||
@@ -35,6 +36,9 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
_DEFAULT_DB = Path("data/thoughts.db")
|
||||
|
||||
# qwen3 and other reasoning models wrap chain-of-thought in <think> tags
|
||||
_THINK_TAG_RE = re.compile(r"<think>.*?</think>\s*", re.DOTALL)
|
||||
|
||||
# Sensitive patterns that must never be stored as facts
|
||||
_SENSITIVE_PATTERNS = [
|
||||
"token",
|
||||
@@ -435,6 +439,9 @@ class ThinkingEngine:
|
||||
def _parse_facts_response(self, raw: str) -> list[str]:
|
||||
"""Parse JSON array from LLM response, stripping markdown fences.
|
||||
|
||||
Resilient to models that prepend reasoning text or wrap the array in
|
||||
prose. Finds the first ``[...]`` block and parses that.
|
||||
|
||||
Args:
|
||||
raw: Raw response string from the LLM.
|
||||
|
||||
@@ -447,14 +454,39 @@ class ThinkingEngine:
|
||||
import json
|
||||
|
||||
cleaned = raw.strip()
|
||||
|
||||
# Strip markdown code fences
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
|
||||
|
||||
facts = json.loads(cleaned)
|
||||
if not isinstance(facts, list):
|
||||
return []
|
||||
# Try direct parse first (fast path)
|
||||
try:
|
||||
facts = json.loads(cleaned)
|
||||
if isinstance(facts, list):
|
||||
return [f for f in facts if isinstance(f, str)]
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
|
||||
return [f for f in facts if isinstance(f, str)]
|
||||
# Fallback: extract first JSON array from the text
|
||||
start = cleaned.find("[")
|
||||
if start == -1:
|
||||
return []
|
||||
# Walk to find the matching close bracket
|
||||
depth = 0
|
||||
for i, ch in enumerate(cleaned[start:], start):
|
||||
if ch == "[":
|
||||
depth += 1
|
||||
elif ch == "]":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
try:
|
||||
facts = json.loads(cleaned[start : i + 1])
|
||||
if isinstance(facts, list):
|
||||
return [f for f in facts if isinstance(f, str)]
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
break
|
||||
return []
|
||||
|
||||
def _filter_and_store_facts(self, facts: list[str]) -> None:
|
||||
"""Filter and store valid facts, blocking sensitive and meta content.
|
||||
@@ -498,7 +530,7 @@ class ThinkingEngine:
|
||||
if facts := self._parse_facts_response(raw):
|
||||
self._filter_and_store_facts(facts)
|
||||
except Exception as exc:
|
||||
logger.debug("Thought distillation skipped: %s", exc)
|
||||
logger.warning("Thought distillation failed: %s", exc)
|
||||
|
||||
async def _maybe_file_issues(self) -> None:
|
||||
"""Every N thoughts, classify recent thoughts and file Gitea issues.
|
||||
@@ -945,12 +977,16 @@ class ThinkingEngine:
|
||||
errors that occur when MCP stdio transports are spawned inside asyncio
|
||||
background tasks (#72). The thinking engine doesn't need Gitea or
|
||||
filesystem tools — it only needs the LLM.
|
||||
|
||||
Strips ``<think>`` tags from reasoning models (qwen3, etc.) so that
|
||||
downstream parsers (fact distillation, issue filing) receive clean text.
|
||||
"""
|
||||
from timmy.agent import create_timmy
|
||||
|
||||
agent = create_timmy(skip_mcp=True)
|
||||
run = await agent.arun(prompt, stream=False)
|
||||
return run.content if hasattr(run, "content") else str(run)
|
||||
raw = run.content if hasattr(run, "content") else str(run)
|
||||
return _THINK_TAG_RE.sub("", raw) if raw else raw
|
||||
|
||||
def _store_thought(self, content: str, seed_type: str) -> Thought:
|
||||
"""Persist a thought to SQLite."""
|
||||
|
||||
Reference in New Issue
Block a user