diff --git a/scripts/generate_scene_descriptions.py b/scripts/generate_scene_descriptions.py index 56ce86aa..d0081917 100644 --- a/scripts/generate_scene_descriptions.py +++ b/scripts/generate_scene_descriptions.py @@ -144,15 +144,22 @@ def describe_image( # Parse JSON from response json_match = re.search(r"\{[\s\S]*\}", response_text) if json_match: - parsed = json.loads(json_match.group()) - # Validate required fields - required = ["mood", "colors", "composition", "camera", "description"] - if all(k in parsed for k in required): - return parsed + try: + parsed = json.loads(json_match.group()) + # Validate required fields + required = ["mood", "colors", "composition", "camera", "description"] + if all(k in parsed for k in required): + if parsed["description"]: # Ensure non-empty description + return parsed + except json.JSONDecodeError: + pass - # Fallback: return raw description + # Fallback: model returned natural language — wrap it + # Clean up markdown formatting + clean = re.sub(r"[*_`#]", "", response_text).strip() + clean = re.sub(r"\n{3,}", "\n\n", clean) return { - "description": response_text[:500], + "description": clean[:500] if clean else response_text[:500], "mood": "unknown", "colors": [], "composition": "unknown", @@ -295,7 +302,7 @@ def main(): ) parser.add_argument("--input", "-i", required=True, help="Input directory with media files") parser.add_argument("--output", "-o", default="training-data/scene-descriptions-auto.jsonl") - parser.add_argument("--model", "-m", default="", help="Ollama model name (auto-detects if empty)") + parser.add_argument("--model", "-m", default="gemma4:latest", help="Ollama model name (auto-detects if empty)") parser.add_argument("--ollama-url", default="http://localhost:11434") parser.add_argument("--limit", "-l", type=int, default=0, help="Max files to process (0=all)") parser.add_argument("--dry-run", action="store_true", help="List files without generating")