fix: increase token limit and improve JSON repair for truncated responses

2026-04-15 09:50:31 +00:00
parent e9d2cb5e56
commit 7ca2ebe6b5
1 changed files with 35 additions and 14 deletions
--- a/scripts/generate_scene_descriptions.py
+++ b/scripts/generate_scene_descriptions.py
@@ -133,7 +133,7 @@ def describe_image(
                    "prompt": SCENE_PROMPT,
                    "images": [image_b64],
                    "stream": False,
-                    "options": {"temperature": 0.3, "num_predict": 512}
+                    "options": {"temperature": 0.3, "num_predict": 1024}
                }).encode(),
                headers={"Content-Type": "application/json"},
            )
@@ -141,21 +141,42 @@ def describe_image(
            data = json.loads(resp.read())
            response_text = data.get("response", "")

-            # Parse JSON from response
+            # Parse JSON from response (handle both complete and truncated JSON)
            json_match = re.search(r"\{[\s\S]*\}", response_text)
-            if json_match:
-                try:
-                    parsed = json.loads(json_match.group())
-                    # Validate required fields
-                    required = ["mood", "colors", "composition", "camera", "description"]
-                    if all(k in parsed for k in required):
-                        if parsed["description"]:  # Ensure non-empty description
-                            return parsed
-                except json.JSONDecodeError:
-                    pass
+            if not json_match:
+                # Try to find opening brace for truncated JSON
+                brace_match = re.search(r"\{", response_text)
+                if brace_match:
+                    json_match = brace_match

-            # Fallback: model returned natural language — wrap it
-            # Clean up markdown formatting
+            if json_match:
+                raw_json = json_match.group() if hasattr(json_match, 'group') else response_text[json_match.start():]
+                # Try strict parse first
+                try:
+                    parsed = json.loads(raw_json)
+                    required = ["mood", "colors", "composition", "camera", "description"]
+                    if all(k in parsed for k in required) and parsed.get("description"):
+                        return parsed
+                except json.JSONDecodeError:
+                    # Attempt repair: extract fields from truncated JSON
+                    repaired = {}
+                    for field in ["mood", "colors", "composition", "camera", "description"]:
+                        pat = rf'"\s*{field}"\s*:\s*"([^"]*)"'
+                        m = re.search(pat, response_text)
+                        if m:
+                            repaired[field] = m.group(1)
+                        elif field == "colors":
+                            colors_match = re.search(r'"colors"\s*:\s*\[([^\]]*)\]', response_text)
+                            if colors_match:
+                                repaired[field] = [c.strip().strip('"') for c in colors_match.group(1).split(",") if c.strip()]
+                            else:
+                                repaired[field] = []
+                        else:
+                            repaired[field] = "unknown"
+                    if repaired.get("description") or repaired.get("mood") != "unknown":
+                        return repaired
+
+            # Final fallback: natural language response
            clean = re.sub(r"[*_`#]", "", response_text).strip()
            clean = re.sub(r"\n{3,}", "\n\n", clean)
            return {