[claude] Add Hermes 4 14B Modelfile, providers config, and smoke test (#1101) (#1110)

2026-03-23 17:59:45 +00:00
parent 3c6a1659d2
commit 19dbdec314
3 changed files with 415 additions and 1 deletions
--- a/config/providers.yaml
+++ b/config/providers.yaml
@@ -54,6 +54,22 @@ providers:
        context_window: 2048
        capabilities: [text, vision, streaming]

+      # AutoLoRA base: Hermes 4 14B — native tool calling, hybrid reasoning, structured JSON
+      # Import via: ollama create hermes4-14b -f Modelfile.hermes4-14b
+      # See Modelfile.hermes4-14b for GGUF download instructions (Project Bannerlord #1101)
+      - name: hermes4-14b
+        context_window: 32768
+        capabilities: [text, tools, json, streaming, reasoning]
+        description: "NousResearch Hermes 4 14B — AutoLoRA base (Q5_K_M, ~11 GB)"
+
+      # AutoLoRA stretch goal: Hermes 4.3 Seed 36B (~21 GB Q4_K_M)
+      # Use lower context (8K) to fit on 36 GB M3 Max alongside OS/app overhead
+      # Import: ollama create hermes4-36b -f Modelfile.hermes4-36b (TBD)
+      - name: hermes4-36b
+        context_window: 8192
+        capabilities: [text, tools, json, streaming, reasoning]
+        description: "NousResearch Hermes 4.3 Seed 36B — stretch goal (Q4_K_M, ~21 GB)"
+
      # Creative writing fallback (Dolphin 3.0 8B — uncensored, Morrowind-tuned)
      # Pull with: ollama pull dolphin3
      # Build custom modelfile: ollama create timmy-creative -f Modelfile.timmy-creative
@@ -136,7 +152,8 @@ fallback_chains:
  
  # Tool-calling models (for function calling)
  tools:
-    - llama3.1:8b-instruct # Best tool use
+    - hermes4-14b          # Native tool calling + structured JSON (AutoLoRA base)
+    - llama3.1:8b-instruct # Reliable tool use
    - qwen2.5:7b           # Reliable tools
    - llama3.2:3b          # Small but capable