WIP: Gemini Code progress on #953
Some checks failed
Tests / lint (pull_request) Failing after 12s
Tests / test (pull_request) Has been skipped

Automated salvage commit — agent session ended (exit 124).
Work in progress, may need continuation.
This commit is contained in:
Alexander Whitestone
2026-03-23 15:07:13 -04:00
parent a4531fa8ac
commit b7b3c80278
3 changed files with 109 additions and 3 deletions

26
poetry.lock generated
View File

@@ -2936,10 +2936,9 @@ numpy = ">=1.22,<2.5"
name = "numpy" name = "numpy"
version = "2.4.2" version = "2.4.2"
description = "Fundamental package for array computing in Python" description = "Fundamental package for array computing in Python"
optional = true optional = false
python-versions = ">=3.11" python-versions = ">=3.11"
groups = ["main"] groups = ["main"]
markers = "extra == \"bigbrain\" or extra == \"embeddings\" or extra == \"voice\""
files = [ files = [
{file = "numpy-2.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825"}, {file = "numpy-2.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825"},
{file = "numpy-2.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1"}, {file = "numpy-2.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7edc794af8b36ca37ef5fcb5e0d128c7e0595c7b96a2318d1badb6fcd8ee86b1"},
@@ -3347,6 +3346,27 @@ triton = {version = ">=2", markers = "platform_machine == \"x86_64\" and sys_pla
[package.extras] [package.extras]
dev = ["black", "flake8", "isort", "pytest", "scipy"] dev = ["black", "flake8", "isort", "pytest", "scipy"]
[[package]]
name = "opencv-python"
version = "4.13.0.92"
description = "Wrapper package for OpenCV python bindings."
optional = false
python-versions = ">=3.6"
groups = ["main"]
files = [
{file = "opencv_python-4.13.0.92-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:caf60c071ec391ba51ed00a4a920f996d0b64e3e46068aac1f646b5de0326a19"},
{file = "opencv_python-4.13.0.92-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:5868a8c028a0b37561579bfb8ac1875babdc69546d236249fff296a8c010ccf9"},
{file = "opencv_python-4.13.0.92-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bc2596e68f972ca452d80f444bc404e08807d021fbba40df26b61b18e01838a"},
{file = "opencv_python-4.13.0.92-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:402033cddf9d294693094de5ef532339f14ce821da3ad7df7c9f6e8316da32cf"},
{file = "opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:bccaabf9eb7f897ca61880ce2869dcd9b25b72129c28478e7f2a5e8dee945616"},
{file = "opencv_python-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:620d602b8f7d8b8dab5f4b99c6eb353e78d3fb8b0f53db1bd258bb1aa001c1d5"},
{file = "opencv_python-4.13.0.92-cp37-abi3-win32.whl", hash = "sha256:372fe164a3148ac1ca51e5f3ad0541a4a276452273f503441d718fab9c5e5f59"},
{file = "opencv_python-4.13.0.92-cp37-abi3-win_amd64.whl", hash = "sha256:423d934c9fafb91aad38edf26efb46da91ffbc05f3f59c4b0c72e699720706f5"},
]
[package.dependencies]
numpy = {version = ">=2", markers = "python_version >= \"3.9\""}
[[package]] [[package]]
name = "optimum" name = "optimum"
version = "2.1.0" version = "2.1.0"
@@ -9700,4 +9720,4 @@ voice = ["openai-whisper", "piper-tts", "pyttsx3", "sounddevice"]
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.1"
python-versions = ">=3.11,<4" python-versions = ">=3.11,<4"
content-hash = "cc50755f322b8755e85ab7bdf0668609612d885552aba14caf175326eedfa216" content-hash = "5af3028474051032bef12182eaa5ef55950cbaeca21d1793f878d54c03994eb0"

View File

@@ -60,6 +60,7 @@ selenium = { version = ">=4.20.0", optional = true }
pytest-randomly = { version = ">=3.16.0", optional = true } pytest-randomly = { version = ">=3.16.0", optional = true }
pytest-xdist = { version = ">=3.5.0", optional = true } pytest-xdist = { version = ">=3.5.0", optional = true }
anthropic = "^0.86.0" anthropic = "^0.86.0"
opencv-python = "^4.13.0.92"
[tool.poetry.extras] [tool.poetry.extras]
telegram = ["python-telegram-bot"] telegram = ["python-telegram-bot"]

View File

@@ -0,0 +1,85 @@
from __future__ import annotations
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Any, List
import cv2
import numpy as np
@dataclass
class Template:
name: str
image: np.ndarray
threshold: float = 0.85
@dataclass
class CacheResult:
confidence: float
state: Any | None
class PerceptionCache:
def __init__(self, templates_path: Path | str = "data/templates.json"):
self.templates_path = Path(templates_path)
self.templates: List[Template] = []
self.load()
def match(self, screenshot: np.ndarray) -> CacheResult:
"""
Matches templates against the screenshot.
Returns the confidence and the name of the best matching template.
"""
best_match_confidence = 0.0
best_match_name = None
for template in self.templates:
res = cv2.matchTemplate(screenshot, template.image, cv2.TM_CCOEFF_NORMED)
_, max_val, _, _ = cv2.minMaxLoc(res)
if max_val > best_match_confidence:
best_match_confidence = max_val
best_match_name = template.name
if best_match_confidence > 0.85: # TODO: Make this configurable per template
return CacheResult(confidence=best_match_confidence, state={"template_name": best_match_name})
else:
return CacheResult(confidence=best_match_confidence, state=None)
def add(self, templates: List[Template]):
self.templates.extend(templates)
def persist(self):
self.templates_path.parent.mkdir(parents=True, exist_ok=True)
# Note: This is a simplified persistence mechanism.
# A more robust solution would store templates as images and metadata in JSON.
with self.templates_path.open("w") as f:
json.dump([{"name": t.name, "threshold": t.threshold} for t in self.templates], f, indent=2)
def load(self):
if self.templates_path.exists():
with self.templates_path.open("r") as f:
templates_data = json.load(f)
# This is a simplified loading mechanism and assumes template images are stored elsewhere.
# For now, we are not loading the actual images.
self.templates = [Template(name=t["name"], image=np.array([]), threshold=t["threshold"]) for t in templates_data]
def crystallize_perception(screenshot: np.ndarray, vlm_response: Any) -> List[Template]:
"""
Extracts reusable patterns from VLM output and generates OpenCV templates.
This is a placeholder and needs to be implemented based on the actual VLM response format.
"""
# Example implementation:
# templates = []
# for item in vlm_response.get("items", []):
# bbox = item.get("bounding_box")
# template_name = item.get("name")
# if bbox and template_name:
# x1, y1, x2, y2 = bbox
# template_image = screenshot[y1:y2, x1:x2]
# templates.append(Template(name=template_name, image=template_image))
# return templates
return []