fix: vendor vision benchmark fixtures (#868 )

2026-04-22 11:37:04 -04:00
30 changed files with 332 additions and 227 deletions
--- a/benchmarks/test_images.json
+++ b/benchmarks/test_images.json
@@ -1,194 +1,354 @@
 [
  {
    "id": "screenshot_github_home",
-    "url": "https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png",
+    "url": "test_images/screenshot_github_home.png",
    "category": "screenshot",
-    "expected_keywords": ["github", "logo", "mark"],
+    "expected_keywords": [
+      "github",
+      "logo",
+      "mark"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "diagram_mermaid_flow",
-    "url": "https://mermaid.ink/img/pako:eNpdkE9PwzAMxb-K5VOl7gc7sAOIIDuAw9gptnRaSJLSJttQStmXs9LCH-ymBOI1ef_42U6cUSae4IkDxbAAWtB6siSZXVhjQTlgl1nigHg5fRBOzSfebopROCu_cytObSfgLSE1ANOeZWkO2IH5upZxYot8m1hqAdpD_63WRl0xdUG1jdl9kPiOb_EWk2JBtPaiKkF4eVIYgO0EtkW-RSgC4gJ6HJYRG1UNdN0HNVd0Bftjj7X8P92qPj-F8l8T3w",
+    "url": "test_images/diagram_mermaid_flow.png",
    "category": "diagram",
-    "expected_keywords": ["flow", "diagram", "process"],
+    "expected_keywords": [
+      "flow",
+      "diagram",
+      "process"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": false
+    }
  },
  {
    "id": "photo_random_1",
-    "url": "https://picsum.photos/seed/vision1/400/300",
+    "url": "test_images/photo_random_1.png",
    "category": "photo",
    "expected_keywords": [],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "photo_random_2",
-    "url": "https://picsum.photos/seed/vision2/400/300",
+    "url": "test_images/photo_random_2.png",
    "category": "photo",
    "expected_keywords": [],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "chart_simple_bar",
-    "url": "https://quickchart.io/chart?c={type:'bar',data:{labels:['Q1','Q2','Q3','Q4'],datasets:[{label:'Revenue',data:[100,150,200,250]}]}}",
+    "url": "test_images/chart_simple_bar.png",
    "category": "chart",
-    "expected_keywords": ["bar", "chart", "revenue"],
+    "expected_keywords": [
+      "bar",
+      "chart",
+      "revenue"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": true}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": true
+    }
  },
  {
    "id": "chart_pie",
-    "url": "https://quickchart.io/chart?c={type:'pie',data:{labels:['A','B','C'],datasets:[{data:[30,50,20]}]}}",
+    "url": "test_images/chart_pie.png",
    "category": "chart",
-    "expected_keywords": ["pie", "chart", "percentage"],
+    "expected_keywords": [
+      "pie",
+      "chart",
+      "percentage"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": true}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": true
+    }
  },
  {
    "id": "diagram_org_chart",
-    "url": "https://mermaid.ink/img/pako:eNpdkE9PwzAMxb-K5VOl7gc7sAOIIDuAw9gptnRaSJLSJttQStmXs9LCH-ymBOI1ef_42U6cUSae4IkDxbAAWtB6iuyIWyrLgXLALrPEAfFy-iCcmk-83RSjcFZ-51ac2k7AW0JqAKY9y9IcsAPzdS3jxBb5NrHUAraH_lutjbpi6oJqG7P7IPEd3-ItJsWCaO1FVYLw8qQwANsJbIt8i1AExAX0OCwjNqoa6LoPaq7oCvbHHmv5f7pVfX4K5b8mvg",
+    "url": "test_images/diagram_org_chart.png",
    "category": "diagram",
-    "expected_keywords": ["organization", "hierarchy", "chart"],
+    "expected_keywords": [
+      "organization",
+      "hierarchy",
+      "chart"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": false
+    }
  },
  {
    "id": "screenshot_terminal",
-    "url": "https://raw.githubusercontent.com/nicehash/nicehash-quick-start/main/images/nicehash-terminal.png",
+    "url": "test_images/screenshot_terminal.png",
    "category": "screenshot",
-    "expected_keywords": ["terminal", "command", "output"],
+    "expected_keywords": [
+      "terminal",
+      "command",
+      "output"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "photo_random_3",
-    "url": "https://picsum.photos/seed/vision3/400/300",
+    "url": "test_images/photo_random_3.png",
    "category": "photo",
    "expected_keywords": [],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "chart_line",
-    "url": "https://quickchart.io/chart?c={type:'line',data:{labels:['Jan','Feb','Mar','Apr'],datasets:[{label:'Temperature',data:[5,8,12,18]}]}}",
+    "url": "test_images/chart_line.png",
    "category": "chart",
-    "expected_keywords": ["line", "chart", "temperature"],
+    "expected_keywords": [
+      "line",
+      "chart",
+      "temperature"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": true}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": true
+    }
  },
  {
    "id": "diagram_sequence",
-    "url": "https://mermaid.ink/img/pako:eNpdkE9PwzAMxb-K5VOl7gc7sAOIIDuAw9gptnRaSJLSJttQStmXs9LCH-ymBOI1ef_42U6cUSae4IkDxbAAWtB6iuyIWyrLgXLALrPEAfFy-iCcmk-83RSjcFZ-51ac2k7AW0JqAKY9y9IcsAPzdS3jxBb5NrHUAraH_lutjbpi6oJqG7P7IPEd3-ItJsWCaO1FVYLw8qQwANsJbIt8i1AExAX0OCwjNqoa6LoPaq7oCvbHHmv5f7pVfX4K5b8mvg",
+    "url": "test_images/diagram_sequence.png",
    "category": "diagram",
-    "expected_keywords": ["sequence", "interaction", "message"],
+    "expected_keywords": [
+      "sequence",
+      "interaction",
+      "message"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": false
+    }
  },
  {
    "id": "photo_random_4",
-    "url": "https://picsum.photos/seed/vision4/400/300",
+    "url": "test_images/photo_random_4.png",
    "category": "photo",
    "expected_keywords": [],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "screenshot_webpage",
-    "url": "https://github.githubassets.com/images/modules/site/social-cards.png",
+    "url": "test_images/screenshot_webpage.png",
    "category": "screenshot",
-    "expected_keywords": ["github", "page", "web"],
+    "expected_keywords": [
+      "github",
+      "page",
+      "web"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "chart_radar",
-    "url": "https://quickchart.io/chart?c={type:'radar',data:{labels:['Speed','Power','Defense','Magic'],datasets:[{label:'Hero',data:[80,60,70,90]}]}}",
+    "url": "test_images/chart_radar.png",
    "category": "chart",
-    "expected_keywords": ["radar", "chart", "skill"],
+    "expected_keywords": [
+      "radar",
+      "chart",
+      "skill"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": true}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": true
+    }
  },
  {
    "id": "photo_random_5",
-    "url": "https://picsum.photos/seed/vision5/400/300",
+    "url": "test_images/photo_random_5.png",
    "category": "photo",
    "expected_keywords": [],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "diagram_class",
-    "url": "https://mermaid.ink/img/pako:eNpdkE9PwzAMxb-K5VOl7gc7sAOIIDuAw9gptnRaSJLSJttQStmXs9LCH-ymBOI1ef_42U6cUSae4IkDxbAAWtB6iuyIWyrLgXLALrPEAfFy-iCcmk-83RSjcFZ-51ac2k7AW0JqAKY9y9IcsAPzdS3jxBb5NrHUAraH_lutjbpi6oJqG7P7IPEd3-ItJsWCaO1FVYLw8qQwANsJbIt8i1AExAX0OCwjNqoa6LoPaq7oCvbHHmv5f7pVfX4K5b8mvg",
+    "url": "test_images/diagram_class.png",
    "category": "diagram",
-    "expected_keywords": ["class", "object", "attribute"],
+    "expected_keywords": [
+      "class",
+      "object",
+      "attribute"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": false
+    }
  },
  {
    "id": "chart_doughnut",
-    "url": "https://quickchart.io/chart?c={type:'doughnut',data:{labels:['Desktop','Mobile','Tablet'],datasets:[{data:[60,30,10]}]}}",
+    "url": "test_images/chart_doughnut.png",
    "category": "chart",
-    "expected_keywords": ["doughnut", "chart", "device"],
+    "expected_keywords": [
+      "doughnut",
+      "chart",
+      "device"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": true}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": true
+    }
  },
  {
    "id": "photo_random_6",
-    "url": "https://picsum.photos/seed/vision6/400/300",
+    "url": "test_images/photo_random_6.png",
    "category": "photo",
    "expected_keywords": [],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "screenshot_error",
-    "url": "https://http.cat/404.jpg",
+    "url": "test_images/screenshot_error.png",
    "category": "screenshot",
-    "expected_keywords": ["404", "error", "cat"],
+    "expected_keywords": [
+      "404",
+      "error",
+      "cat"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": true}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": true
+    }
  },
  {
    "id": "diagram_network",
-    "url": "https://mermaid.ink/img/pako:eNpdkE9PwzAMxb-K5VOl7gc7sAOIIDuAw9gptnRaSJLSJttQStmXs9LCH-ymBOI1ef_42U6cUSae4IkDxbAAWtB6iuyIWyrLgXLALrPEAfFy-iCcmk-83RSjcFZ-51ac2k7AW0JqAKY9y9IcsAPzdS3jxBb5NrHUAraH_lutjbpi6oJqG7P7IPEd3-ItJsWCaO1FVYLw8qQwANsJbIt8i1AExAX0OCwjNqoa6LoPaq7oCvbHHmv5f7pVfX4K5b8mvg",
+    "url": "test_images/diagram_network.png",
    "category": "diagram",
-    "expected_keywords": ["network", "node", "connection"],
+    "expected_keywords": [
+      "network",
+      "node",
+      "connection"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": false
+    }
  },
  {
    "id": "photo_random_7",
-    "url": "https://picsum.photos/seed/vision7/400/300",
+    "url": "test_images/photo_random_7.png",
    "category": "photo",
    "expected_keywords": [],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "chart_stacked_bar",
-    "url": "https://quickchart.io/chart?c={type:'bar',data:{labels:['2022','2023','2024'],datasets:[{label:'Cloud',data:[100,150,200]},{label:'On-prem',data:[200,180,160]}]},options:{scales:{x:{stacked:true},y:{stacked:true}}}}",
+    "url": "test_images/chart_stacked_bar.png",
    "category": "chart",
-    "expected_keywords": ["stacked", "bar", "chart"],
+    "expected_keywords": [
+      "stacked",
+      "bar",
+      "chart"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 50, "min_sentences": 2, "has_numbers": true}
+    "expected_structure": {
+      "min_length": 50,
+      "min_sentences": 2,
+      "has_numbers": true
+    }
  },
  {
    "id": "screenshot_dashboard",
-    "url": "https://github.githubassets.com/images/modules/site/features-code-search.png",
+    "url": "test_images/screenshot_dashboard.png",
    "category": "screenshot",
-    "expected_keywords": ["search", "code", "feature"],
+    "expected_keywords": [
+      "search",
+      "code",
+      "feature"
+    ],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  },
  {
    "id": "photo_random_8",
-    "url": "https://picsum.photos/seed/vision8/400/300",
+    "url": "test_images/photo_random_8.png",
    "category": "photo",
    "expected_keywords": [],
    "ground_truth_ocr": "",
-    "expected_structure": {"min_length": 30, "min_sentences": 1, "has_numbers": false}
+    "expected_structure": {
+      "min_length": 30,
+      "min_sentences": 1,
+      "has_numbers": false
+    }
  }
 ]
--- a/benchmarks/test_images/chart_doughnut.png
+++ b/benchmarks/test_images/chart_doughnut.png
--- a/benchmarks/test_images/chart_line.png
+++ b/benchmarks/test_images/chart_line.png
--- a/benchmarks/test_images/chart_pie.png
+++ b/benchmarks/test_images/chart_pie.png
--- a/benchmarks/test_images/chart_radar.png
+++ b/benchmarks/test_images/chart_radar.png
--- a/benchmarks/test_images/chart_simple_bar.png
+++ b/benchmarks/test_images/chart_simple_bar.png
--- a/benchmarks/test_images/chart_stacked_bar.png
+++ b/benchmarks/test_images/chart_stacked_bar.png
--- a/benchmarks/test_images/diagram_class.png
+++ b/benchmarks/test_images/diagram_class.png
--- a/benchmarks/test_images/diagram_mermaid_flow.png
+++ b/benchmarks/test_images/diagram_mermaid_flow.png
--- a/benchmarks/test_images/diagram_network.png
+++ b/benchmarks/test_images/diagram_network.png
--- a/benchmarks/test_images/diagram_org_chart.png
+++ b/benchmarks/test_images/diagram_org_chart.png
--- a/benchmarks/test_images/diagram_sequence.png
+++ b/benchmarks/test_images/diagram_sequence.png
--- a/benchmarks/test_images/photo_random_1.png
+++ b/benchmarks/test_images/photo_random_1.png
--- a/benchmarks/test_images/photo_random_2.png
+++ b/benchmarks/test_images/photo_random_2.png
--- a/benchmarks/test_images/photo_random_3.png
+++ b/benchmarks/test_images/photo_random_3.png
--- a/benchmarks/test_images/photo_random_4.png
+++ b/benchmarks/test_images/photo_random_4.png
--- a/benchmarks/test_images/photo_random_5.png
+++ b/benchmarks/test_images/photo_random_5.png
--- a/benchmarks/test_images/photo_random_6.png
+++ b/benchmarks/test_images/photo_random_6.png
--- a/benchmarks/test_images/photo_random_7.png
+++ b/benchmarks/test_images/photo_random_7.png
--- a/benchmarks/test_images/photo_random_8.png
+++ b/benchmarks/test_images/photo_random_8.png
--- a/benchmarks/test_images/screenshot_dashboard.png
+++ b/benchmarks/test_images/screenshot_dashboard.png
--- a/benchmarks/test_images/screenshot_error.png
+++ b/benchmarks/test_images/screenshot_error.png
--- a/benchmarks/test_images/screenshot_github_home.png
+++ b/benchmarks/test_images/screenshot_github_home.png
--- a/benchmarks/test_images/screenshot_terminal.png
+++ b/benchmarks/test_images/screenshot_terminal.png
--- a/benchmarks/test_images/screenshot_webpage.png
+++ b/benchmarks/test_images/screenshot_webpage.png
--- a/benchmarks/vision_benchmark.py
+++ b/benchmarks/vision_benchmark.py
@@ -11,17 +11,19 @@ Usage:

    # Single image test
    python benchmarks/vision_benchmark.py --url https://example.com/image.png
+    python benchmarks/vision_benchmark.py --url benchmarks/test_images/photo_random_1.png

    # Generate test report
    python benchmarks/vision_benchmark.py --images benchmarks/test_images.json --output benchmarks/vision_results.json

-Test image dataset: benchmarks/test_images.json (50-100 diverse images)
+Test image dataset: benchmarks/test_images.json (committed local fixtures under benchmarks/test_images/)
 """

 import argparse
 import asyncio
 import base64
 import json
+import mimetypes
 import os
 import statistics
 import sys
@@ -67,6 +69,28 @@ EVAL_PROMPTS = {
 # ---------------------------------------------------------------------------


+def _is_remote_image_source(image_source: str) -> bool:
+    return image_source.startswith(("http://", "https://", "data:", "file://"))
+
+
+def _image_source_to_payload_url(image_source: str) -> str:
+    """Convert local image paths into data URLs; keep remote URLs unchanged."""
+    if image_source.startswith(("http://", "https://", "data:")):
+        return image_source
+
+    resolved = image_source[len("file://"):] if image_source.startswith("file://") else image_source
+    local_path = Path(os.path.expanduser(resolved)).resolve()
+    if not local_path.is_file():
+        return image_source
+
+    mime_type, _ = mimetypes.guess_type(str(local_path))
+    if not mime_type:
+        mime_type = "application/octet-stream"
+
+    encoded = base64.b64encode(local_path.read_bytes()).decode("ascii")
+    return f"data:{mime_type};base64,{encoded}"
+
+
 async def analyze_with_model(
    image_url: str,
    prompt: str,
@@ -84,6 +108,8 @@ async def analyze_with_model(
    """
    import httpx

+    image_payload_url = _image_source_to_payload_url(image_url)
+
    provider = model_config["provider"]
    model_id = model_config["model_id"]

@@ -93,7 +119,7 @@ async def analyze_with_model(
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
-                {"type": "image_url", "image_url": {"url": image_url}},
+                {"type": "image_url", "image_url": {"url": image_payload_url}},
            ],
        }
    ]
@@ -570,8 +596,18 @@ def generate_sample_dataset() -> List[dict]:

 def load_dataset(path: str) -> List[dict]:
    """Load test dataset from JSON file."""
-    with open(path) as f:
-        return json.load(f)
+    dataset_path = Path(path).resolve()
+    with open(dataset_path) as f:
+        dataset = json.load(f)
+
+    base_dir = dataset_path.parent
+    for image in dataset:
+        image_url = image.get("url")
+        if not image_url or _is_remote_image_source(image_url):
+            continue
+        image["url"] = str((base_dir / image_url).resolve())
+
+    return dataset


 # ---------------------------------------------------------------------------
@@ -582,7 +618,7 @@ def load_dataset(path: str) -> List[dict]:
 async def main():
    parser = argparse.ArgumentParser(description="Vision Benchmark Suite (Issue #817)")
    parser.add_argument("--images", help="Path to test images JSON file")
-    parser.add_argument("--url", help="Single image URL to test")
+    parser.add_argument("--url", help="Single image URL or local file path to test")
    parser.add_argument("--category", default="photo", help="Category for single URL")
    parser.add_argument("--output", default=None, help="Output JSON file")
    parser.add_argument("--runs", type=int, default=1, help="Runs per model per image")
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -211,43 +211,6 @@ class PluginContext:
        }
        logger.debug("Plugin %s registered CLI command: %s", self.manifest.name, name)

-    # -- memory provider registration ----------------------------------------
-
-    def register_memory_provider(self, provider) -> None:
-        """Register a memory provider supplied by this plugin.
-
-        The provider must be an instance of ``agent.memory_provider.MemoryProvider``.
-        Only one plugin-registered memory provider is accepted; a second
-        attempt is rejected with a warning.
-
-        The registered provider is retrievable via
-        ``get_plugin_memory_provider()`` and is picked up by ``run_agent.py``
-        when ``memory.provider`` in *config.yaml* matches the provider's
-        ``name`` property.
-        """
-        from agent.memory_provider import MemoryProvider
-
-        if not isinstance(provider, MemoryProvider):
-            logger.warning(
-                "Plugin '%s' tried to register a memory provider that does not "
-                "inherit from MemoryProvider. Ignoring.",
-                self.manifest.name,
-            )
-            return
-        if self._manager._plugin_memory_provider is not None:
-            logger.warning(
-                "Plugin '%s' tried to register a memory provider, but one is "
-                "already registered by another plugin. Only one plugin-supplied "
-                "memory provider is allowed at a time.",
-                self.manifest.name,
-            )
-            return
-        self._manager._plugin_memory_provider = provider
-        logger.info(
-            "Plugin '%s' registered memory provider: %s",
-            self.manifest.name, provider.name,
-        )
-
    # -- context engine registration -----------------------------------------

    def register_context_engine(self, engine) -> None:
@@ -360,7 +323,6 @@ class PluginManager:
        self._plugin_tool_names: Set[str] = set()
        self._cli_commands: Dict[str, dict] = {}
        self._context_engine = None  # Set by a plugin via register_context_engine()
-        self._plugin_memory_provider = None  # Set by a plugin via register_memory_provider()
        self._discovered: bool = False
        self._cli_ref = None  # Set by CLI after plugin discovery
        # Plugin skill registry: qualified name → metadata dict.
@@ -737,11 +699,6 @@ def get_plugin_context_engine():
    return get_plugin_manager()._context_engine


-def get_plugin_memory_provider():
-    """Return the plugin-registered memory provider, or None."""
-    return get_plugin_manager()._plugin_memory_provider
-
-
 def get_plugin_toolsets() -> List[tuple]:
    """Return plugin toolsets as ``(key, label, description)`` tuples.

--- a/run_agent.py
+++ b/run_agent.py
@@ -1193,18 +1193,6 @@ class AIAgent:
                    from plugins.memory import load_memory_provider as _load_mem
                    self._memory_manager = _MemoryManager()
                    _mp = _load_mem(_mem_provider_name)
-                    # Fall back to a user plugin that called register_memory_provider()
-                    if _mp is None:
-                        try:
-                            from hermes_cli.plugins import get_plugin_memory_provider as _gpm
-                            _candidate = _gpm()
-                            if _candidate and _candidate.name == _mem_provider_name:
-                                _mp = _candidate
-                        except Exception as _gpm_err:
-                            logger.debug(
-                                "get_plugin_memory_provider() failed during fallback lookup: %s",
-                                _gpm_err,
-                            )
                    if _mp and _mp.is_available():
                        self._memory_manager.add_provider(_mp)
                    if self._memory_manager.providers:
--- a/tests/hermes_cli/test_plugins.py
+++ b/tests/hermes_cli/test_plugins.py
@@ -19,7 +19,6 @@ from hermes_cli.plugins import (
    PluginManifest,
    get_plugin_manager,
    get_pre_tool_call_block_message,
-    get_plugin_memory_provider,
    discover_plugins,
    invoke_hook,
 )
@@ -610,105 +609,3 @@ class TestPreLlmCallTargetRouting:
 # in PluginContext (hermes_cli/plugins.py).  The tests referenced _plugin_commands,
 # commands_registered, get_plugin_command_handler, and GATEWAY_KNOWN_COMMANDS
 # integration — all of which are unimplemented features.
-
-
-# ── TestRegisterMemoryProvider ─────────────────────────────────────────────
-
-
-class TestRegisterMemoryProvider:
-    """Regression tests for PluginContext.register_memory_provider() — issue #990.
-
-    The MemPalace plugin (and any user plugin following the developer guide)
-    calls ``ctx.register_memory_provider(provider)`` inside ``register(ctx)``.
-    Before the fix, PluginContext had no such method and the plugin failed to
-    load with: 'PluginContext' object has no attribute 'register_memory_provider'.
-    """
-
-    def _make_memory_plugin(self, plugins_dir: "Path", name: str) -> None:
-        """Write a minimal user plugin that registers a stub MemoryProvider."""
-        from agent.memory_provider import MemoryProvider
-
-        plugin_dir = plugins_dir / name
-        plugin_dir.mkdir(parents=True, exist_ok=True)
-        (plugin_dir / "plugin.yaml").write_text(
-            f"name: {name}\nversion: 0.1.0\ndescription: Stub memory plugin\n"
-        )
-        # The register() body imports and calls register_memory_provider — this
-        # is the exact pattern documented in memory-provider-plugin.md and used
-        # by third-party plugins such as MemPalace.
-        (plugin_dir / "__init__.py").write_text(
-            "from agent.memory_provider import MemoryProvider\n"
-            "\n"
-            "class _StubProvider(MemoryProvider):\n"
-            "    @property\n"
-            f"    def name(self): return '{name}'\n"
-            "    def is_available(self): return True\n"
-            "    def initialize(self, session_id, **kw): pass\n"
-            "    def get_tool_schemas(self): return []\n"
-            "\n"
-            "def register(ctx):\n"
-            "    ctx.register_memory_provider(_StubProvider())\n"
-        )
-
-    def test_register_memory_provider_succeeds(self, tmp_path, monkeypatch):
-        """A user plugin calling register_memory_provider() loads without error."""
-        plugins_dir = tmp_path / "hermes_test" / "plugins"
-        self._make_memory_plugin(plugins_dir, "mempalace")
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
-
-        mgr = PluginManager()
-        mgr.discover_and_load()
-
-        assert "mempalace" in mgr._plugins
-        assert mgr._plugins["mempalace"].enabled, (
-            mgr._plugins["mempalace"].error
-        )
-
-    def test_plugin_memory_provider_stored(self, tmp_path, monkeypatch):
-        """The provider instance is accessible via get_plugin_memory_provider()."""
-        import hermes_cli.plugins as plugins_mod
-
-        plugins_dir = tmp_path / "hermes_test" / "plugins"
-        self._make_memory_plugin(plugins_dir, "mempalace")
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
-
-        mgr = PluginManager()
-        # Swap the singleton so get_plugin_memory_provider() sees our manager
-        monkeypatch.setattr(plugins_mod, "_plugin_manager", mgr)
-        mgr.discover_and_load()
-
-        provider = get_plugin_memory_provider()
-        assert provider is not None
-        assert provider.name == "mempalace"
-
-    def test_second_registration_rejected(self, tmp_path, monkeypatch):
-        """Only one plugin-registered memory provider is accepted."""
-        plugins_dir = tmp_path / "hermes_test" / "plugins"
-        self._make_memory_plugin(plugins_dir, "first_provider")
-        self._make_memory_plugin(plugins_dir, "second_provider")
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
-
-        mgr = PluginManager()
-        mgr.discover_and_load()
-
-        # The manager should hold exactly one provider
-        assert mgr._plugin_memory_provider is not None
-        assert mgr._plugin_memory_provider.name in {"first_provider", "second_provider"}
-
-    def test_non_provider_rejected(self, tmp_path, monkeypatch):
-        """Passing a non-MemoryProvider object logs a warning and is ignored."""
-        plugins_dir = tmp_path / "hermes_test" / "plugins"
-        plugin_dir = plugins_dir / "bad_provider"
-        plugin_dir.mkdir(parents=True, exist_ok=True)
-        (plugin_dir / "plugin.yaml").write_text("name: bad_provider\n")
-        (plugin_dir / "__init__.py").write_text(
-            "def register(ctx):\n"
-            "    ctx.register_memory_provider('not-a-provider')\n"
-        )
-        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_test"))
-
-        mgr = PluginManager()
-        mgr.discover_and_load()
-
-        # Plugin still loads (warning only), but no provider is stored
-        assert mgr._plugin_memory_provider is None
--- a/tests/test_vision_benchmark.py
+++ b/tests/test_vision_benchmark.py
@@ -11,12 +11,14 @@ import pytest
 sys.path.insert(0, str(Path(__file__).parent.parent / "benchmarks"))

 from vision_benchmark import (
+    analyze_with_model,
    compute_ocr_accuracy,
    compute_description_completeness,
    compute_structural_accuracy,
    aggregate_results,
    to_markdown,
    generate_sample_dataset,
+    load_dataset,
    MODELS,
    EVAL_PROMPTS,
 )
@@ -197,6 +199,71 @@ class TestMarkdown:


 class TestDataset:
+    def test_repo_dataset_uses_local_image_paths(self):
+        dataset_path = Path(__file__).parent.parent / "benchmarks" / "test_images.json"
+        dataset = json.loads(dataset_path.read_text())
+
+        assert dataset, "benchmark dataset should not be empty"
+        assert all(not entry["url"].startswith(("http://", "https://")) for entry in dataset)
+
+    def test_load_dataset_resolves_relative_local_paths(self, tmp_path):
+        images_dir = tmp_path / "images"
+        images_dir.mkdir()
+        image_path = images_dir / "sample.png"
+        image_path.write_bytes(b"png-bytes")
+
+        dataset_path = tmp_path / "dataset.json"
+        dataset_path.write_text(json.dumps([
+            {
+                "id": "sample",
+                "url": "images/sample.png",
+                "category": "photo",
+                "expected_keywords": [],
+                "expected_structure": {"min_length": 30, "min_sentences": 1},
+            }
+        ]))
+
+        loaded = load_dataset(str(dataset_path))
+
+        assert loaded[0]["url"] == str(image_path.resolve())
+
+    @pytest.mark.asyncio
+    async def test_analyze_with_model_encodes_local_file_as_data_url(self, tmp_path, monkeypatch):
+        image_path = tmp_path / "tiny.png"
+        image_path.write_bytes(
+            bytes.fromhex(
+                "89504E470D0A1A0A"
+                "0000000D49484452000000010000000108060000001F15C489"
+                "0000000D49444154789C6360000002000154A24F5D00000000"
+                "49454E44AE426082"
+            )
+        )
+
+        fake_response = MagicMock()
+        fake_response.raise_for_status.return_value = None
+        fake_response.json.return_value = {
+            "choices": [{"message": {"content": "Looks like a tiny image."}}],
+            "usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3},
+        }
+
+        fake_client = MagicMock()
+        fake_client.post = AsyncMock(return_value=fake_response)
+        fake_ctx = MagicMock()
+        fake_ctx.__aenter__ = AsyncMock(return_value=fake_client)
+        fake_ctx.__aexit__ = AsyncMock(return_value=None)
+
+        monkeypatch.setenv("OPENROUTER_API_KEY", "test-key")
+        with patch("httpx.AsyncClient", return_value=fake_ctx):
+            result = await analyze_with_model(
+                str(image_path),
+                "Describe this image",
+                {"provider": "openrouter", "model_id": "fake/model"},
+            )
+
+        assert result["success"] is True
+        sent_url = fake_client.post.await_args.kwargs["json"]["messages"][0]["content"][1]["image_url"]["url"]
+        assert sent_url.startswith("data:image/png;base64,")
+
    def test_sample_dataset_has_entries(self):
        dataset = generate_sample_dataset()
        assert len(dataset) >= 4