Merge PR #200: fix extract_images and truncate_message bugs in platform base

Authored by 0xbyt4.

Two fixes:
- extract_images(): only remove extracted image tags, not all markdown image
  tags. Previously ![doc](report.pdf) was silently dropped when real images
  were also present.
- truncate_message(): walk chunk_body not full_chunk when tracking code block
  state, so the reopened fence prefix doesn't toggle in_code off and leave
  continuation chunks with unclosed code blocks.
This commit is contained in:
teknium1
2026-03-04 19:37:58 -08:00
2 changed files with 357 additions and 6 deletions

View File

@@ -482,10 +482,14 @@ class BasePlatformAdapter(ABC):
url = match.group(1)
images.append((url, ""))
# Remove matched image tags from content if we found images
# Remove only the matched image tags from content (not all markdown images)
if images:
cleaned = re.sub(md_pattern, '', cleaned)
cleaned = re.sub(html_pattern, '', cleaned)
extracted_urls = {url for url, _ in images}
def _remove_if_extracted(match):
url = match.group(2) if match.lastindex >= 2 else match.group(1)
return '' if url in extracted_urls else match.group(0)
cleaned = re.sub(md_pattern, _remove_if_extracted, cleaned)
cleaned = re.sub(html_pattern, _remove_if_extracted, cleaned)
# Clean up leftover blank lines
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
@@ -833,11 +837,11 @@ class BasePlatformAdapter(ABC):
full_chunk = prefix + chunk_body
# Walk the chunk line-by-line to determine whether we end
# inside an open code block.
# Walk only the chunk_body (not the prefix we prepended) to
# determine whether we end inside an open code block.
in_code = carry_lang is not None
lang = carry_lang or ""
for line in full_chunk.split("\n"):
for line in chunk_body.split("\n"):
stripped = line.strip()
if stripped.startswith("```"):
if in_code: