feat: add Atlas Inference Engine provider detection (closes #674 )

Add Atlas (pure Rust+CUDA inference engine by Avarok) to the provider registry and local server detection. Changes: - agent/model_metadata.py: - _URL_TO_PROVIDER: add localhost:8888 -> atlas mapping - detect_local_server_type: add Atlas detection via /health endpoint (checks for engine=atlas in response) and /v1/models fallback - Update docstring to include atlas in return type Atlas serves OpenAI-compatible API at localhost:8888/v1 from a 2.5 GB Docker image. 3x faster than vLLM with zero Python dependencies. Quick start: docker pull avarok/atlas-gb10:alpha-2.8 docker run -d --gpus all --ipc=host -p 8888:8888 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ avarok/atlas-gb10:alpha-2.8 serve \ Sehyo/Qwen3.5-35B-A3B-NVFP4 --speculative Hermes config (config.yaml): provider: custom base_url: http://localhost:8888/v1
2026-04-14 19:07:25 -04:00
1 changed files with 29 additions and 6 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -233,11 +233,13 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.deepseek.com": "deepseek",
    "api.githubcopilot.com": "copilot",
    "models.github.ai": "copilot",
-    "api.fireworks.ai": "fireworks",
-    "opencode.ai": "opencode-go",
-    "api.x.ai": "xai",
-    "api.xiaomimimo.com": "xiaomi",
-    "xiaomimimo.com": "xiaomi",
+    \"api.fireworks.ai\": \"fireworks\",
+    \"opencode.ai\": \"opencode-go\",
+    \"api.x.ai\": \"xai\",
+    \"api.xiaomimimo.com\": \"xiaomi\",
+    \"xiaomimimo.com\": \"xiaomi\",
+    # Atlas Inference Engine — pure Rust+CUDA, OpenAI-compatible at :8888
+    \"localhost:8888\": \"atlas\",
 }


@@ -302,10 +304,12 @@ def is_local_endpoint(base_url: str) -> bool:
    return False


+import json
+
 def detect_local_server_type(base_url: str) -> Optional[str]:
    """Detect which local server is running at base_url by probing known endpoints.

-    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", or None.
+    Returns one of: "ollama", "lm-studio", "vllm", "llamacpp", "atlas", or None.
    """
    import httpx

@@ -355,6 +359,25 @@ def detect_local_server_type(base_url: str) -> Optional[str]:
                        return "vllm"
            except Exception:
                pass
+            # Atlas Inference Engine: OpenAI-compatible, custom /health
+            # Atlas returns {"status":"ok","engine":"atlas"} on /health
+            try:
+                r = client.get(f"{server_url}/health")
+                if r.status_code == 200:
+                    try:
+                        data = r.json()
+                        if data.get("engine") == "atlas" or "atlas" in str(data).lower():
+                            return "atlas"
+                    except Exception:
+                        pass
+                # Fallback: check /v1/models for Atlas-specific response
+                r = client.get(f"{server_url}/v1/models")
+                if r.status_code == 200:
+                    data = r.json()
+                    if "atlas" in json.dumps(data).lower():
+                        return "atlas"
+            except Exception:
+                pass
    except Exception:
        pass