From b7b3c8027891fbb540713bc838e2ccfc1567d049 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Mon, 23 Mar 2026 15:07:13 -0400 Subject: [PATCH] WIP: Gemini Code progress on #953 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automated salvage commit — agent session ended (exit 124). Work in progress, may need continuation. --- poetry.lock | 26 ++++++- pyproject.toml | 1 + src/timmy/sovereignty/perception_cache.py | 85 +++++++++++++++++++++++ 3 files changed, 109 insertions(+), 3 deletions(-) create mode 100644 src/timmy/sovereignty/perception_cache.py diff --git a/poetry.lock b/poetry.lock index f6dfc263..e9f10366 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2936,10 +2936,9 @@ numpy = ">=1.22,<2.5" name = "numpy" version = "2.4.2" description = "Fundamental package for array computing in Python" -optional = true +optional = false python-versions = ">=3.11" groups = ["main"] -markers = "extra == \"bigbrain\" or extra == \"embeddings\" or extra == \"voice\"" files = [ {file = "numpy-2.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825"}, {file = "numpy-2.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1"}, @@ -3347,6 +3346,27 @@ triton = {version = ">=2", markers = "platform_machine == \"x86_64\" and sys_pla [package.extras] dev = ["black", "flake8", "isort", "pytest", "scipy"] +[[package]] +name = "opencv-python" +version = "4.13.0.92" +description = "Wrapper package for OpenCV python bindings." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "opencv_python-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:caf60c071ec391ba51ed00a4a920f996d0b64e3e46068aac1f646b5de0326a19"}, + {file = "opencv_python-4.13.0.92-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:5868a8c028a0b37561579bfb8ac1875babdc69546d236249fff296a8c010ccf9"}, + {file = "opencv_python-4.13.0.92-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bc2596e68f972ca452d80f444bc404e08807d021fbba40df26b61b18e01838a"}, + {file = "opencv_python-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:402033cddf9d294693094de5ef532339f14ce821da3ad7df7c9f6e8316da32cf"}, + {file = "opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:bccaabf9eb7f897ca61880ce2869dcd9b25b72129c28478e7f2a5e8dee945616"}, + {file = "opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:620d602b8f7d8b8dab5f4b99c6eb353e78d3fb8b0f53db1bd258bb1aa001c1d5"}, + {file = "opencv_python-4.13.0.92-cp37-abi3-win32.whl", hash = "sha256:372fe164a3148ac1ca51e5f3ad0541a4a276452273f503441d718fab9c5e5f59"}, + {file = "opencv_python-4.13.0.92-cp37-abi3-win_amd64.whl", hash = "sha256:423d934c9fafb91aad38edf26efb46da91ffbc05f3f59c4b0c72e699720706f5"}, +] + +[package.dependencies] +numpy = {version = ">=2", markers = "python_version >= \"3.9\""} + [[package]] name = "optimum" version = "2.1.0" @@ -9700,4 +9720,4 @@ voice = ["openai-whisper", "piper-tts", "pyttsx3", "sounddevice"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<4" -content-hash = "cc50755f322b8755e85ab7bdf0668609612d885552aba14caf175326eedfa216" +content-hash = "5af3028474051032bef12182eaa5ef55950cbaeca21d1793f878d54c03994eb0" diff --git a/pyproject.toml b/pyproject.toml index b49140ce..faedaff7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ selenium = { version = ">=4.20.0", optional = true } pytest-randomly = { version = ">=3.16.0", optional = true } pytest-xdist = { version = ">=3.5.0", optional = true } anthropic = "^0.86.0" +opencv-python = "^4.13.0.92" [tool.poetry.extras] telegram = ["python-telegram-bot"] diff --git a/src/timmy/sovereignty/perception_cache.py b/src/timmy/sovereignty/perception_cache.py new file mode 100644 index 00000000..b8e0c87a --- /dev/null +++ b/src/timmy/sovereignty/perception_cache.py @@ -0,0 +1,85 @@ +from __future__ import annotations + +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any, List + +import cv2 +import numpy as np + + +@dataclass +class Template: + name: str + image: np.ndarray + threshold: float = 0.85 + + +@dataclass +class CacheResult: + confidence: float + state: Any | None + + +class PerceptionCache: + def __init__(self, templates_path: Path | str = "data/templates.json"): + self.templates_path = Path(templates_path) + self.templates: List[Template] = [] + self.load() + + def match(self, screenshot: np.ndarray) -> CacheResult: + """ + Matches templates against the screenshot. + Returns the confidence and the name of the best matching template. + """ + best_match_confidence = 0.0 + best_match_name = None + + for template in self.templates: + res = cv2.matchTemplate(screenshot, template.image, cv2.TM_CCOEFF_NORMED) + _, max_val, _, _ = cv2.minMaxLoc(res) + if max_val > best_match_confidence: + best_match_confidence = max_val + best_match_name = template.name + + if best_match_confidence > 0.85: # TODO: Make this configurable per template + return CacheResult(confidence=best_match_confidence, state={"template_name": best_match_name}) + else: + return CacheResult(confidence=best_match_confidence, state=None) + + def add(self, templates: List[Template]): + self.templates.extend(templates) + + def persist(self): + self.templates_path.parent.mkdir(parents=True, exist_ok=True) + # Note: This is a simplified persistence mechanism. + # A more robust solution would store templates as images and metadata in JSON. + with self.templates_path.open("w") as f: + json.dump([{"name": t.name, "threshold": t.threshold} for t in self.templates], f, indent=2) + + def load(self): + if self.templates_path.exists(): + with self.templates_path.open("r") as f: + templates_data = json.load(f) + # This is a simplified loading mechanism and assumes template images are stored elsewhere. + # For now, we are not loading the actual images. + self.templates = [Template(name=t["name"], image=np.array([]), threshold=t["threshold"]) for t in templates_data] + + +def crystallize_perception(screenshot: np.ndarray, vlm_response: Any) -> List[Template]: + """ + Extracts reusable patterns from VLM output and generates OpenCV templates. + This is a placeholder and needs to be implemented based on the actual VLM response format. + """ + # Example implementation: + # templates = [] + # for item in vlm_response.get("items", []): + # bbox = item.get("bounding_box") + # template_name = item.get("name") + # if bbox and template_name: + # x1, y1, x2, y2 = bbox + # template_image = screenshot[y1:y2, x1:x2] + # templates.append(Template(name=template_name, image=template_image)) + # return templates + return []