fix: improve JSON parsing fallback and default to gemma4

This commit is contained in:
2026-04-15 09:42:27 +00:00
parent 990676fb02
commit e9d2cb5e56

View File

@@ -144,15 +144,22 @@ def describe_image(
# Parse JSON from response # Parse JSON from response
json_match = re.search(r"\{[\s\S]*\}", response_text) json_match = re.search(r"\{[\s\S]*\}", response_text)
if json_match: if json_match:
parsed = json.loads(json_match.group()) try:
# Validate required fields parsed = json.loads(json_match.group())
required = ["mood", "colors", "composition", "camera", "description"] # Validate required fields
if all(k in parsed for k in required): required = ["mood", "colors", "composition", "camera", "description"]
return parsed if all(k in parsed for k in required):
if parsed["description"]: # Ensure non-empty description
return parsed
except json.JSONDecodeError:
pass
# Fallback: return raw description # Fallback: model returned natural language — wrap it
# Clean up markdown formatting
clean = re.sub(r"[*_`#]", "", response_text).strip()
clean = re.sub(r"\n{3,}", "\n\n", clean)
return { return {
"description": response_text[:500], "description": clean[:500] if clean else response_text[:500],
"mood": "unknown", "mood": "unknown",
"colors": [], "colors": [],
"composition": "unknown", "composition": "unknown",
@@ -295,7 +302,7 @@ def main():
) )
parser.add_argument("--input", "-i", required=True, help="Input directory with media files") parser.add_argument("--input", "-i", required=True, help="Input directory with media files")
parser.add_argument("--output", "-o", default="training-data/scene-descriptions-auto.jsonl") parser.add_argument("--output", "-o", default="training-data/scene-descriptions-auto.jsonl")
parser.add_argument("--model", "-m", default="", help="Ollama model name (auto-detects if empty)") parser.add_argument("--model", "-m", default="gemma4:latest", help="Ollama model name (auto-detects if empty)")
parser.add_argument("--ollama-url", default="http://localhost:11434") parser.add_argument("--ollama-url", default="http://localhost:11434")
parser.add_argument("--limit", "-l", type=int, default=0, help="Max files to process (0=all)") parser.add_argument("--limit", "-l", type=int, default=0, help="Max files to process (0=all)")
parser.add_argument("--dry-run", action="store_true", help="List files without generating") parser.add_argument("--dry-run", action="store_true", help="List files without generating")