From 7ca2ebe6b5d2697a8a8bde484d35c3adeeaa6965 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Wed, 15 Apr 2026 09:50:31 +0000 Subject: [PATCH] fix: increase token limit and improve JSON repair for truncated responses --- scripts/generate_scene_descriptions.py | 49 ++++++++++++++++++-------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/scripts/generate_scene_descriptions.py b/scripts/generate_scene_descriptions.py index d0081917..ca5c0087 100644 --- a/scripts/generate_scene_descriptions.py +++ b/scripts/generate_scene_descriptions.py @@ -133,7 +133,7 @@ def describe_image( "prompt": SCENE_PROMPT, "images": [image_b64], "stream": False, - "options": {"temperature": 0.3, "num_predict": 512} + "options": {"temperature": 0.3, "num_predict": 1024} }).encode(), headers={"Content-Type": "application/json"}, ) @@ -141,21 +141,42 @@ def describe_image( data = json.loads(resp.read()) response_text = data.get("response", "") - # Parse JSON from response + # Parse JSON from response (handle both complete and truncated JSON) json_match = re.search(r"\{[\s\S]*\}", response_text) - if json_match: - try: - parsed = json.loads(json_match.group()) - # Validate required fields - required = ["mood", "colors", "composition", "camera", "description"] - if all(k in parsed for k in required): - if parsed["description"]: # Ensure non-empty description - return parsed - except json.JSONDecodeError: - pass + if not json_match: + # Try to find opening brace for truncated JSON + brace_match = re.search(r"\{", response_text) + if brace_match: + json_match = brace_match - # Fallback: model returned natural language — wrap it - # Clean up markdown formatting + if json_match: + raw_json = json_match.group() if hasattr(json_match, 'group') else response_text[json_match.start():] + # Try strict parse first + try: + parsed = json.loads(raw_json) + required = ["mood", "colors", "composition", "camera", "description"] + if all(k in parsed for k in required) and parsed.get("description"): + return parsed + except json.JSONDecodeError: + # Attempt repair: extract fields from truncated JSON + repaired = {} + for field in ["mood", "colors", "composition", "camera", "description"]: + pat = rf'"\s*{field}"\s*:\s*"([^"]*)"' + m = re.search(pat, response_text) + if m: + repaired[field] = m.group(1) + elif field == "colors": + colors_match = re.search(r'"colors"\s*:\s*\[([^\]]*)\]', response_text) + if colors_match: + repaired[field] = [c.strip().strip('"') for c in colors_match.group(1).split(",") if c.strip()] + else: + repaired[field] = [] + else: + repaired[field] = "unknown" + if repaired.get("description") or repaired.get("mood") != "unknown": + return repaired + + # Final fallback: natural language response clean = re.sub(r"[*_`#]", "", response_text).strip() clean = re.sub(r"\n{3,}", "\n\n", clean) return {