WIP: Gemini Code progress on #953

Automated salvage commit — agent session ended (exit 124). Work in progress, may need continuation.
2026-03-23 15:07:13 -04:00
parent a4531fa8ac
commit b7b3c80278
3 changed files with 109 additions and 3 deletions
--- a/poetry.lock
+++ b/poetry.lock
@@ -2936,10 +2936,9 @@ numpy = ">=1.22,<2.5"
 name = "numpy"
 version = "2.4.2"
 description = "Fundamental package for array computing in Python"
-optional = true
+optional = false
 python-versions = ">=3.11"
 groups = ["main"]
 markers = "extra == \"bigbrain\" or extra == \"embeddings\" or extra == \"voice\""
 files = [
    {file = "numpy-2.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825"},
    {file = "numpy-2.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1"},
@@ -3347,6 +3346,27 @@ triton = {version = ">=2", markers = "platform_machine == \"x86_64\" and sys_pla
 [package.extras]
 dev = ["black", "flake8", "isort", "pytest", "scipy"]
 [[package]]
 name = "opencv-python"
 version = "4.13.0.92"
 description = "Wrapper package for OpenCV python bindings."
 optional = false
 python-versions = ">=3.6"
 groups = ["main"]
 files = [
    {file = "opencv_python-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:caf60c071ec391ba51ed00a4a920f996d0b64e3e46068aac1f646b5de0326a19"},
    {file = "opencv_python-4.13.0.92-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:5868a8c028a0b37561579bfb8ac1875babdc69546d236249fff296a8c010ccf9"},
    {file = "opencv_python-4.13.0.92-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bc2596e68f972ca452d80f444bc404e08807d021fbba40df26b61b18e01838a"},
    {file = "opencv_python-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:402033cddf9d294693094de5ef532339f14ce821da3ad7df7c9f6e8316da32cf"},
    {file = "opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:bccaabf9eb7f897ca61880ce2869dcd9b25b72129c28478e7f2a5e8dee945616"},
    {file = "opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:620d602b8f7d8b8dab5f4b99c6eb353e78d3fb8b0f53db1bd258bb1aa001c1d5"},
    {file = "opencv_python-4.13.0.92-cp37-abi3-win32.whl", hash = "sha256:372fe164a3148ac1ca51e5f3ad0541a4a276452273f503441d718fab9c5e5f59"},
    {file = "opencv_python-4.13.0.92-cp37-abi3-win_amd64.whl", hash = "sha256:423d934c9fafb91aad38edf26efb46da91ffbc05f3f59c4b0c72e699720706f5"},
 ]
 [package.dependencies]
 numpy = {version = ">=2", markers = "python_version >= \"3.9\""}
 [[package]]
 name = "optimum"
 version = "2.1.0"
@@ -9700,4 +9720,4 @@ voice = ["openai-whisper", "piper-tts", "pyttsx3", "sounddevice"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.11,<4"
-content-hash = "cc50755f322b8755e85ab7bdf0668609612d885552aba14caf175326eedfa216"
+content-hash = "5af3028474051032bef12182eaa5ef55950cbaeca21d1793f878d54c03994eb0"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,6 +60,7 @@ selenium = { version = ">=4.20.0", optional = true }
 pytest-randomly = { version = ">=3.16.0", optional = true }
 pytest-xdist = { version = ">=3.5.0", optional = true }
 anthropic = "^0.86.0"
 opencv-python = "^4.13.0.92"
 [tool.poetry.extras]
 telegram = ["python-telegram-bot"]
--- a/src/timmy/sovereignty/perception_cache.py
+++ b/src/timmy/sovereignty/perception_cache.py
@@ -0,0 +1,85 @@
 from __future__ import annotations
 import json
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, List
 import cv2
 import numpy as np
@dataclass
 class Template:
    name: str
    image: np.ndarray
    threshold: float = 0.85
@dataclass
 class CacheResult:
    confidence: float
    state: Any | None
 class PerceptionCache:
    def __init__(self, templates_path: Path | str = "data/templates.json"):
        self.templates_path = Path(templates_path)
        self.templates: List[Template] = []
        self.load()
    def match(self, screenshot: np.ndarray) -> CacheResult:
        """
        Matches templates against the screenshot.
        Returns the confidence and the name of the best matching template.
        """
        best_match_confidence = 0.0
        best_match_name = None
        for template in self.templates:
            res = cv2.matchTemplate(screenshot, template.image, cv2.TM_CCOEFF_NORMED)
            _, max_val, _, _ = cv2.minMaxLoc(res)
            if max_val > best_match_confidence:
                best_match_confidence = max_val
                best_match_name = template.name
        if best_match_confidence > 0.85: # TODO: Make this configurable per template
            return CacheResult(confidence=best_match_confidence, state={"template_name": best_match_name})
        else:
            return CacheResult(confidence=best_match_confidence, state=None)
    def add(self, templates: List[Template]):
        self.templates.extend(templates)
    def persist(self):
        self.templates_path.parent.mkdir(parents=True, exist_ok=True)
        # Note: This is a simplified persistence mechanism.
        # A more robust solution would store templates as images and metadata in JSON.
        with self.templates_path.open("w") as f:
            json.dump([{"name": t.name, "threshold": t.threshold} for t in self.templates], f, indent=2)
    def load(self):
        if self.templates_path.exists():
            with self.templates_path.open("r") as f:
                templates_data = json.load(f)
                # This is a simplified loading mechanism and assumes template images are stored elsewhere.
                # For now, we are not loading the actual images.
                self.templates = [Template(name=t["name"], image=np.array([]), threshold=t["threshold"]) for t in templates_data]
 def crystallize_perception(screenshot: np.ndarray, vlm_response: Any) -> List[Template]:
    """
    Extracts reusable patterns from VLM output and generates OpenCV templates.
    This is a placeholder and needs to be implemented based on the actual VLM response format.
    """
    # Example implementation:
    # templates = []
    # for item in vlm_response.get("items", []):
    #     bbox = item.get("bounding_box")
    #     template_name = item.get("name")
    #     if bbox and template_name:
    #         x1, y1, x2, y2 = bbox
    #         template_image = screenshot[y1:y2, x1:x2]
    #         templates.append(Template(name=template_name, image=template_image))
    # return templates
    return []