fix: improve JSON parsing fallback and default to gemma4

This commit is contained in:
2026-04-15 09:42:27 +00:00
parent 990676fb02
commit e9d2cb5e56

View File

@@ -144,15 +144,22 @@ def describe_image(
# Parse JSON from response
json_match = re.search(r"\{[\s\S]*\}", response_text)
if json_match:
parsed = json.loads(json_match.group())
# Validate required fields
required = ["mood", "colors", "composition", "camera", "description"]
if all(k in parsed for k in required):
return parsed
try:
parsed = json.loads(json_match.group())
# Validate required fields
required = ["mood", "colors", "composition", "camera", "description"]
if all(k in parsed for k in required):
if parsed["description"]: # Ensure non-empty description
return parsed
except json.JSONDecodeError:
pass
# Fallback: return raw description
# Fallback: model returned natural language — wrap it
# Clean up markdown formatting
clean = re.sub(r"[*_`#]", "", response_text).strip()
clean = re.sub(r"\n{3,}", "\n\n", clean)
return {
"description": response_text[:500],
"description": clean[:500] if clean else response_text[:500],
"mood": "unknown",
"colors": [],
"composition": "unknown",
@@ -295,7 +302,7 @@ def main():
)
parser.add_argument("--input", "-i", required=True, help="Input directory with media files")
parser.add_argument("--output", "-o", default="training-data/scene-descriptions-auto.jsonl")
parser.add_argument("--model", "-m", default="", help="Ollama model name (auto-detects if empty)")
parser.add_argument("--model", "-m", default="gemma4:latest", help="Ollama model name (auto-detects if empty)")
parser.add_argument("--ollama-url", default="http://localhost:11434")
parser.add_argument("--limit", "-l", type=int, default=0, help="Max files to process (0=all)")
parser.add_argument("--dry-run", action="store_true", help="List files without generating")