Merge PR #200: fix extract_images and truncate_message bugs in platform base
Authored by 0xbyt4. Two fixes: - extract_images(): only remove extracted image tags, not all markdown image tags. Previously  was silently dropped when real images were also present. - truncate_message(): walk chunk_body not full_chunk when tracking code block state, so the reopened fence prefix doesn't toggle in_code off and leave continuation chunks with unclosed code blocks.
This commit is contained in:
@@ -482,10 +482,14 @@ class BasePlatformAdapter(ABC):
|
||||
url = match.group(1)
|
||||
images.append((url, ""))
|
||||
|
||||
# Remove matched image tags from content if we found images
|
||||
# Remove only the matched image tags from content (not all markdown images)
|
||||
if images:
|
||||
cleaned = re.sub(md_pattern, '', cleaned)
|
||||
cleaned = re.sub(html_pattern, '', cleaned)
|
||||
extracted_urls = {url for url, _ in images}
|
||||
def _remove_if_extracted(match):
|
||||
url = match.group(2) if match.lastindex >= 2 else match.group(1)
|
||||
return '' if url in extracted_urls else match.group(0)
|
||||
cleaned = re.sub(md_pattern, _remove_if_extracted, cleaned)
|
||||
cleaned = re.sub(html_pattern, _remove_if_extracted, cleaned)
|
||||
# Clean up leftover blank lines
|
||||
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
|
||||
|
||||
@@ -833,11 +837,11 @@ class BasePlatformAdapter(ABC):
|
||||
|
||||
full_chunk = prefix + chunk_body
|
||||
|
||||
# Walk the chunk line-by-line to determine whether we end
|
||||
# inside an open code block.
|
||||
# Walk only the chunk_body (not the prefix we prepended) to
|
||||
# determine whether we end inside an open code block.
|
||||
in_code = carry_lang is not None
|
||||
lang = carry_lang or ""
|
||||
for line in full_chunk.split("\n"):
|
||||
for line in chunk_body.split("\n"):
|
||||
stripped = line.strip()
|
||||
if stripped.startswith("```"):
|
||||
if in_code:
|
||||
|
||||
Reference in New Issue
Block a user